From 7e7c8494a976e03a3bb9c096530721a1c3a9a8be Mon Sep 17 00:00:00 2001 From: QuentinBisson Date: Tue, 16 Apr 2024 14:55:23 +0200 Subject: [PATCH] Auto update mimir dashboards Signed-off-by: QuentinBisson --- CHANGELOG.md | 4 + .../private/capa-agregatred-error-logs.json | 142 + .../private/capi-aggregated-error-logs.json | 142 + .../shared/private/efk-stack-app.json | 2 +- .../shared/private/loki-canary.json | 92 +- .../shared/private/loki-chunks.json | 1323 ++++--- .../shared/private/loki-deletion.json | 826 ++-- .../dashboards/shared/private/loki-logs.json | 74 +- .../private/loki-mixin-recording-rules.json | 74 +- .../shared/private/loki-operational.json | 3357 +++++++++++------ .../shared/private/loki-reads-resources.json | 1113 +++--- .../dashboards/shared/private/loki-reads.json | 890 ++--- .../shared/private/loki-retention.json | 1725 +++++---- .../shared/private/loki-writes-resources.json | 814 ++-- .../shared/private/loki-writes.json | 787 ++-- .../private/mimir-alertmanager-resources.json | 701 ++++ .../shared/private/mimir-alertmanager.json | 2448 ++++++++++++ .../private/mimir-compactor-resources.json | 46 +- .../shared/private/mimir-compactor.json | 191 +- .../shared/private/mimir-config.json | 262 ++ .../shared/private/mimir-object-store.json | 826 ++++ .../shared/private/mimir-overrides.json | 270 ++ .../private/mimir-overview-networking.json | 56 +- .../private/mimir-overview-resources.json | 2195 ++++------- .../shared/private/mimir-overview.json | 414 +- .../shared/private/mimir-queries.json | 2539 +++++++++++++ .../private/mimir-reads-networking.json | 98 +- .../shared/private/mimir-reads-resources.json | 124 +- .../shared/private/mimir-reads.json | 1493 ++++++-- .../mimir-remote-ruler-reads-networking.json | 1052 ++++++ .../mimir-remote-ruler-reads-resources.json | 986 +++++ .../private/mimir-remote-ruler-reads.json | 1687 +++++++++ .../private/mimir-rollout-progress.json | 1408 +++++++ .../shared/private/mimir-ruler.json | 122 +- .../shared/private/mimir-scaling.json | 365 ++ .../shared/private/mimir-slow-queries.json | 1467 +++++++ .../shared/private/mimir-tenants.json | 2665 +++++++++++++ .../shared/private/mimir-top-tenants.json | 1643 ++++++++ .../private/mimir-writes-networking.json | 56 +- .../private/mimir-writes-resources.json | 62 +- .../shared/private/mimir-writes.json | 650 +++- loki/README.md | 1 + loki/mixin.libsonnet | 2 +- mimir/.gitignore | 6 + mimir/mixin.libsonnet | 13 + mimir/update.sh | 32 + 46 files changed, 28249 insertions(+), 6996 deletions(-) create mode 100644 helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capa-agregatred-error-logs.json create mode 100644 helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capi-aggregated-error-logs.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager-resources.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-config.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-object-store.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overrides.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-queries.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-networking.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-resources.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-rollout-progress.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-scaling.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-slow-queries.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-tenants.json create mode 100644 helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-top-tenants.json create mode 100644 mimir/.gitignore create mode 100644 mimir/mixin.libsonnet create mode 100755 mimir/update.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 2094ec77..1983d575 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add script to update all mimir mixins. + ## [3.10.4] - 2024-04-10 ### Fixed diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capa-agregatred-error-logs.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capa-agregatred-error-logs.json new file mode 100644 index 00000000..e78fa497 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capa-agregatred-error-logs.json @@ -0,0 +1,142 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "aggregate error logs for specific cluster ID for CAPA controllers", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 121, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "", + "gridPos": { + "h": 21, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "builder", + "expr": "{app=~\"cluster-api-provider-aws|capa-iam-operator|aws-resolver-rules-operator|irsa-operator\"} |~ `(?i)error` |= `$cluster` | logfmt", + "queryType": "range", + "refId": "A" + } + ], + "title": "Agregated error logs for all CAPA controllers - capa-controller-manager, capa-iam-operator,aws-resolver-rules-operator,irsa-operator", + "transparent": true, + "type": "logs" + } + ], + "schemaVersion": 39, + "tags": [ + "owner:team-phoenix", + "team:phoenix", + "provider:capi" + ], + "templating": { + "list": [ + { + "current": { + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(up{app=\"kubernetes\"},cluster_id)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(up{app=\"kubernetes\"},cluster_id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "CAPA - agregated error logs for capa controllers", + "uid": "bdiako8tt1b7kc", + "version": 2, + "weekStart": "" +} diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capi-aggregated-error-logs.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capi-aggregated-error-logs.json new file mode 100644 index 00000000..efbeb26f --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/capi-aggregated-error-logs.json @@ -0,0 +1,142 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "aggregate error logs for specific cluster ID from all 3 CAPI controllers", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 119, + "links": [], + "panels": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "description": "", + "gridPos": { + "h": 21, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "${loki_datasource}" + }, + "editorMode": "builder", + "expr": "{app=~\"cluster-api\"} |~ `(?i)error` |= `$cluster` | logfmt", + "queryType": "range", + "refId": "A" + } + ], + "title": "Agregated error logs for all CAPI controllers", + "transparent": true, + "type": "logs" + } + ], + "schemaVersion": 39, + "tags": [ + "owner:team-phoenix", + "team:phoenix", + "provider:capi" + ], + "templating": { + "list": [ + { + "current": { + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "definition": "label_values(up{app=\"kubernetes\"},cluster_id)", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(up{app=\"kubernetes\"},cluster_id)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "Mimir", + "value": "mimir" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "CAPI - agregated error logs for capi controllers", + "uid": "bdi7iswg81czkcasd", + "version": 9, + "weekStart": "" +} diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/efk-stack-app.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/efk-stack-app.json index ec648227..1c207307 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/efk-stack-app.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/efk-stack-app.json @@ -5,7 +5,7 @@ "list": [] }, "description": "Elasticsearch cluster stats", - "editable": false, + "editable": true, "gnetId": 2322, "graphTooltip": 1, "id": null, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-canary.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-canary.json index 9afbd2db..43a40254 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-canary.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-canary.json @@ -12,7 +12,7 @@ }, "editable": true, "gnetId": null, - "graphTooltip": 1, + "graphTooltip": 0, "hideControls": false, "links": [ { @@ -21,9 +21,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -83,7 +81,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(count(loki_canary_entries_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}))", + "expr": "sum(count(loki_canary_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}))", "format": null, "instant": false, "interval": "", @@ -181,7 +179,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_canary_entries_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", + "expr": "sum(increase(loki_canary_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", "format": null, "instant": false, "interval": "", @@ -279,7 +277,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_canary_missing_entries_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", + "expr": "sum(increase(loki_canary_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", "format": null, "instant": false, "interval": "", @@ -377,7 +375,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_canary_spot_check_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", + "expr": "sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", "format": null, "instant": false, "interval": "", @@ -475,7 +473,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_canary_spot_check_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", + "expr": "sum(increase(loki_canary_spot_check_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", "format": null, "instant": false, "interval": "", @@ -573,7 +571,7 @@ "steppedLine": false, "targets": [ { - "expr": "((sum(loki_canary_metric_test_expected{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}) - sum(loki_canary_metric_test_actual{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}))/(sum(loki_canary_metric_test_actual{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}))) * 100", + "expr": "((sum(loki_canary_metric_test_expected{cluster=~\"$cluster\",namespace=~\"$namespace\"}) - sum(loki_canary_metric_test_actual{cluster=~\"$cluster\",namespace=~\"$namespace\"}))/(sum(loki_canary_metric_test_actual{cluster=~\"$cluster\",namespace=~\"$namespace\"}))) * 100", "format": null, "instant": false, "interval": "", @@ -671,7 +669,7 @@ "steppedLine": false, "targets": [ { - "expr": "(sum(increase(loki_canary_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range])))*100", + "expr": "(sum(increase(loki_canary_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range])))*100", "format": null, "instant": false, "interval": "", @@ -769,7 +767,7 @@ "steppedLine": false, "targets": [ { - "expr": "(sum(increase(loki_canary_spot_check_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))) * 100", + "expr": "(sum(increase(loki_canary_spot_check_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_spot_check_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))) * 100", "format": null, "instant": false, "interval": "", @@ -867,7 +865,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(loki_canary_metric_test_expected{cluster_id=~\"$cluster\",namespace=~\"$namespace\"})", + "expr": "sum(loki_canary_metric_test_expected{cluster=~\"$cluster\",namespace=~\"$namespace\"})", "format": null, "instant": false, "interval": "", @@ -965,7 +963,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(loki_canary_metric_test_actual{cluster_id=~\"$cluster\",namespace=~\"$namespace\"})", + "expr": "sum(loki_canary_metric_test_actual{cluster=~\"$cluster\",namespace=~\"$namespace\"})", "format": null, "instant": false, "interval": "", @@ -1063,7 +1061,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_canary_websocket_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", + "expr": "sum(increase(loki_canary_websocket_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))", "format": null, "instant": false, "interval": "", @@ -1161,7 +1159,7 @@ "steppedLine": false, "targets": [ { - "expr": "(sum(increase(loki_canary_websocket_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__range])))*100", + "expr": "(sum(increase(loki_canary_websocket_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range]))/sum(increase(loki_canary_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__range])))*100", "format": null, "instant": false, "interval": "", @@ -1244,16 +1242,20 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.95, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p95", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p50", - "legendLink": null + "legendLink": null, + "step": 10 } ], "thresholds": [], @@ -1320,7 +1322,7 @@ }, "targets": [ { - "expr": "sum(rate(loki_canary_response_latency_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(loki_canary_response_latency_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -1388,16 +1390,20 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p99", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(loki_canary_spot_check_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p95", - "legendLink": null + "legendLink": null, + "step": 10 } ], "thresholds": [], @@ -1473,16 +1479,20 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[15m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[15m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p99", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[15m])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(loki_canary_metric_test_request_duration_seconds_bucket{cluster=~\"$cluster\",namespace=~\"$namespace\"}[15m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p95", - "legendLink": null + "legendLink": null, + "step": 10 } ], "thresholds": [], @@ -1558,10 +1568,12 @@ "steppedLine": false, "targets": [ { - "expr": "topk(20, (sum by (cluster_id, pod) (increase(loki_canary_spot_check_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))/sum by (cluster_id, pod) (increase(loki_canary_spot_check_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) * 100)) > 0", + "expr": "topk(20, (sum by (cluster, pod) (increase(loki_canary_spot_check_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))/sum by (cluster, pod) (increase(loki_canary_spot_check_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])) * 100)) > 0", "format": "time_series", + "intervalFactor": 2, "legendFormat": "", - "legendLink": null + "legendLink": null, + "step": 10 } ], "thresholds": [], @@ -1637,10 +1649,12 @@ "steppedLine": false, "targets": [ { - "expr": "topk(20,(sum by (cluster_id, pod)(increase(loki_canary_missing_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))/sum by (cluster_id, pod)(increase(loki_canary_entries_total{cluster_id=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])))*100) > 0", + "expr": "topk(20,(sum by (cluster, pod)(increase(loki_canary_missing_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval]))/sum by (cluster, pod)(increase(loki_canary_entries_total{cluster=~\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])))*100) > 0", "format": "time_series", - "legendFormat": "Missing {{ cluster_id }} {{ pod }}", - "legendLink": null + "intervalFactor": 2, + "legendFormat": "Missing {{ cluster }} {{ pod }}", + "legendLink": null, + "step": 10 } ], "thresholds": [], @@ -1685,9 +1699,7 @@ "schemaVersion": 27, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -1697,7 +1709,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -1718,7 +1730,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -1741,7 +1753,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-chunks.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-chunks.json index 712fbe7f..525e347c 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-chunks.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-chunks.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -29,98 +27,156 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 1, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(loki_ingester_memory_chunks{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})", + "expr": "sum(loki_ingester_memory_chunks{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "series", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Series", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 2, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(loki_ingester_memory_chunks{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})", + "expr": "sum(loki_ingester_memory_chunks{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}) / sum(loki_ingester_memory_streams{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "chunks", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Chunks per series", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -135,67 +191,81 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, + "fill": 1, "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_utilization_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_utilization_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_utilization_sum{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_utilization_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Utilization", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { - "format": "ms", + "format": "percentunit", "label": null, "logBase": 1, "max": null, @@ -213,64 +283,78 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_age_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_age_seconds_sum{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1e3 / sum(rate(loki_ingester_chunk_age_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Age", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -303,67 +387,81 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_entries_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_entries_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunk_entries_sum{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) * 1 / sum(rate(loki_ingester_chunk_entries_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Log Entries Per Chunk", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -381,51 +479,80 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 6, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))", + "expr": "sum(rate(loki_chunk_store_index_entries_per_chunk_sum{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_chunk_store_index_entries_per_chunk_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Index Entries", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Index Entries Per Chunk", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -440,51 +567,80 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 7, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "loki_ingester_flush_queue_length{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"} or cortex_ingester_flush_queue_length{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", + "expr": "cortex_ingester_flush_queue_length{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Queue Length", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { "aliasColors": { @@ -493,196 +649,82 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, "fill": 10, "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_ingester_chunk_age_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Flush Rate", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -697,99 +739,138 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 9, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Chunks Flushed/Second", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 10, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{reason}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Chunk Flush Reason", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "short", @@ -845,7 +926,7 @@ "span": 12, "targets": [ { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval]))", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -907,7 +988,7 @@ "span": 12, "targets": [ { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)", + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (le)", "format": "heatmap", "intervalFactor": 2, "legendFormat": "{{le}}", @@ -946,63 +1027,96 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "fill": 1, "id": 13, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p99", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", + "expr": "histogram_quantile(0.90, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p90", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", + "expr": "histogram_quantile(0.50, sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[1m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p50", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Chunk Size Quantiles", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -1017,63 +1131,96 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 14, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 12, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))", + "expr": "histogram_quantile(0.5, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p50", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(loki_ingester_chunk_bounds_hours_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) by (le))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "p99", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster_id=~\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))", + "expr": "sum(rate(loki_ingester_chunk_bounds_hours_sum{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m])) / sum(rate(loki_ingester_chunk_bounds_hours_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"$namespace/(loki|enterprise-logs)-write\"}[5m]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "avg", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Chunk Duration hours (end-start)", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -1087,9 +1234,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -1099,7 +1244,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -1107,6 +1252,24 @@ "regex": "", "type": "datasource" }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], + "query": "label_values(loki_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -1120,7 +1283,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -1143,7 +1306,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster_id=\"$cluster_id\", cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-deletion.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-deletion.json index be85db07..dedde660 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-deletion.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-deletion.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -61,9 +59,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(loki_compactor_pending_delete_requests_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "sum(loki_compactor_pending_delete_requests_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, + "intervalFactor": 2, "refId": "A" } ], @@ -136,9 +135,10 @@ "steppedLine": false, "targets": [ { - "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "max(loki_compactor_oldest_pending_delete_request_age_seconds{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", "instant": true, + "intervalFactor": 2, "refId": "A" } ], @@ -191,145 +191,232 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 3, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "(loki_compactor_delete_requests_received_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", + "expr": "(loki_compactor_delete_requests_received_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"} or on() vector(0)) - on () (loki_compactor_delete_requests_processed_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"} or on () vector(0))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "in progress", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "# of Delete Requests (received - processed) ", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 4, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", + "expr": "sum(increase(loki_compactor_delete_requests_received_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "received", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Delete Requests Received / Day", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 5, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", + "expr": "sum(increase(loki_compactor_delete_requests_processed_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1d]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "processed", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Delete Requests Processed / Day", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -344,145 +431,232 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 6, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", + "expr": "node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"}", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Compactor CPU usage", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 7, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} / 1024 / 1024 ", + "expr": "go_memstats_heap_inuse_bytes{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", job=\"loki/loki-backend\"} / 1024 / 1024 ", "format": "time_series", + "intervalFactor": 2, "legendFormat": " {{pod}} ", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Compactor memory usage (MiB)", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 8, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}", + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Compaction run duration (seconds)", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -497,98 +671,156 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 9, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", + "expr": "sum(increase(loki_compactor_load_pending_requests_attempts_total{status=\"fail\", cluster_id=\"$cluster_id\", cluster=~\"$cluster\", namespace=~\"$namespace\"}[1h]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "failures", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Failures in Loading Delete Requests / Hour", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "id": 10, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_compactor_deleted_lines{cluster_id=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)", + "expr": "sum(rate(loki_compactor_deleted_lines{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"$namespace/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (user)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{user}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Lines Deleted / Sec", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -608,7 +840,7 @@ "span": 6, "targets": [ { - "expr": "{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", + "expr": "{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", component=\"backend\"} |~ \"Started processing delete request|delete request for user marked as processed\" | logfmt | line_format \"{{.ts}} user={{.user}} delete_request_id={{.delete_request_id}} msg={{.msg}}\" ", "refId": "A" } ], @@ -621,7 +853,7 @@ "span": 6, "targets": [ { - "expr": "{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"compactor\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", + "expr": "{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", component=\"backend\"} |~ \"delete request for user added\" | logfmt | line_format \"{{.ts}} user={{.user}} query='{{.query}}'\"", "refId": "A" } ], @@ -640,9 +872,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -652,7 +882,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -660,6 +890,16 @@ "regex": "", "type": "datasource" }, + { + "hide": 0, + "label": null, + "name": "loki_datasource", + "options": [], + "query": "loki", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": null, "current": { @@ -673,6 +913,24 @@ "multi": false, "name": "cluster", "options": [], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], "query": "label_values(loki_build_info, cluster_id)", "refresh": 1, "regex": "", @@ -696,7 +954,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster_id=\"$cluster_id\", cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-logs.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-logs.json index 233cc5d7..fd1aacac 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-logs.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-logs.json @@ -14,9 +14,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -65,7 +63,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(go_goroutines{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})", + "expr": "sum(go_goroutines{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"})", "refId": "A" } ], @@ -79,7 +77,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -151,7 +149,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(go_gc_duration_seconds{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)", + "expr": "sum(go_gc_duration_seconds{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}) by (quantile)", "legendFormat": "{{quantile}}", "refId": "A" } @@ -166,7 +164,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -238,7 +236,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[5m]))", "refId": "A" } ], @@ -252,7 +250,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -324,7 +322,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})", + "expr": "sum(container_memory_working_set_bytes{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"})", "refId": "A" } ], @@ -338,7 +336,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -410,7 +408,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", + "expr": "sum(rate(container_network_transmit_bytes_total{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", "refId": "A" } ], @@ -424,7 +422,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -496,7 +494,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", + "expr": "sum(rate(container_network_receive_bytes_total{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\"}[5m]))", "refId": "A" } ], @@ -510,7 +508,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -582,7 +580,7 @@ "steppedLine": false, "targets": [ { - "expr": "increase(kube_pod_container_status_last_terminated_reason{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) > 0", + "expr": "increase(kube_pod_container_status_last_terminated_reason{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"}[30m]) > 0", "legendFormat": "{{reason}}", "refId": "A" } @@ -597,7 +595,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -669,7 +667,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(promtail_custom_bad_words_total{cluster=~\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)", + "expr": "sum(rate(promtail_custom_bad_words_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", exported_namespace=\"$namespace\", exported_pod=~\"$deployment.*\", exported_pod=~\"$pod\", container=~\"$container\"}[5m])) by (level)", "legendFormat": "{{level}}", "refId": "A" } @@ -684,7 +682,7 @@ "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -773,7 +771,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=\"$level\" |= \"$filter\" [5m])) by (level)", + "expr": "sum(rate({cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\" } |logfmt| level=~\"$level\" |= \"$filter\" [5m])) by (level)", "intervalFactor": 3, "legendFormat": "{{level}}", "refId": "A" @@ -789,7 +787,7 @@ "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -838,7 +836,7 @@ }, "targets": [ { - "expr": "{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=\"$level\" |= \"$filter\"", + "expr": "{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\", pod=~\"$pod\", container=~\"$container\"} | logfmt | level=~\"$level\" |= \"$filter\"", "refId": "A" } ], @@ -853,9 +851,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -865,7 +861,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -886,6 +882,24 @@ "multi": false, "name": "cluster", "options": [], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], "query": "label_values(loki_build_info, cluster_id)", "refresh": 1, "regex": "", @@ -909,7 +923,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster_id=\"$cluster_id\", cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, @@ -939,7 +953,7 @@ "multi": false, "name": "deployment", "options": [], - "query": "label_values(kube_deployment_created{cluster_id=~\"$cluster\", namespace=\"$namespace\"}, deployment)", + "query": "label_values(kube_deployment_created{cluster_id=\"$cluster_id\", namespace=\"$namespace\"}, deployment)", "refresh": 0, "regex": "", "sort": 1, @@ -959,7 +973,7 @@ "multi": false, "name": "pod", "options": [], - "query": "label_values(kube_pod_container_info{cluster_id=~\"$cluster\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)", + "query": "label_values(kube_pod_container_info{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$deployment.*\"}, pod)", "refresh": 0, "regex": "", "sort": 1, @@ -979,7 +993,7 @@ "multi": false, "name": "container", "options": [], - "query": "label_values(kube_pod_container_info{cluster_id=~\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)", + "query": "label_values(kube_pod_container_info{cluster_id=\"$cluster_id\", namespace=\"$namespace\", pod=~\"$pod\", pod=~\"$deployment.*\"}, container)", "refresh": 0, "regex": "", "sort": 1, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-mixin-recording-rules.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-mixin-recording-rules.json index 5ab6c442..3ba607d3 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-mixin-recording-rules.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-mixin-recording-rules.json @@ -15,9 +15,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -27,7 +25,7 @@ "liveNow": false, "panels": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "fieldConfig": { "defaults": { "color": { @@ -75,9 +73,9 @@ "pluginVersion": "8.3.0-38205pre", "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": false, - "expr": "sum(loki_ruler_wal_appender_ready) by (pod, tenant) == 0", + "expr": "sum(loki_ruler_wal_appender_ready{cluster_id=\"$cluster_id\"}) by (pod, tenant) == 0", "instant": true, "interval": "", "legendFormat": "", @@ -88,7 +86,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -159,9 +157,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_samples_appended_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "expr": "sum(rate(loki_ruler_wal_samples_appended_total{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -171,7 +169,7 @@ "type": "timeseries" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "Series are unique combinations of labels", "fieldConfig": { "defaults": { @@ -242,9 +240,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "expr": "sum(rate(loki_ruler_wal_storage_created_series_total{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -254,7 +252,7 @@ "type": "timeseries" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "Difference between highest timestamp appended to WAL and highest timestamp successfully written to remote storage", "fieldConfig": { "defaults": { @@ -325,9 +323,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{tenant=~\"${tenant}\"}\n or vector(0)\n )", + "expr": "loki_ruler_wal_prometheus_remote_storage_highest_timestamp_in_seconds{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}\n- on (tenant)\n (\n loki_ruler_wal_prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}\n or vector(0)\n )", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -337,7 +335,7 @@ "type": "timeseries" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "", "fieldConfig": { "defaults": { @@ -408,9 +406,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", + "expr": "sum(rate(loki_ruler_wal_prometheus_remote_storage_samples_total{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}[$__rate_interval])) by (tenant) > 0", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -420,7 +418,7 @@ "type": "timeseries" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "\n", "fieldConfig": { "defaults": { @@ -492,9 +490,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "sum by (tenant) (loki_ruler_wal_disk_size{tenant=~\"${tenant}\"})", + "expr": "sum by (tenant) (loki_ruler_wal_disk_size{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"})", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -504,7 +502,7 @@ "type": "timeseries" }, { - "datasource": "${datasource}", + "datasource": "$datasource", "description": "Some number of pending samples is expected, but if remote-write is failing this value will remain high", "fieldConfig": { "defaults": { @@ -575,9 +573,9 @@ }, "targets": [ { - "datasource": "${datasource}", + "datasource": "$datasource", "exemplar": true, - "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{tenant=~\"${tenant}\"}) by (tenant,pod) > 0", + "expr": "max(loki_ruler_wal_prometheus_remote_storage_samples_pending{cluster_id=\"$cluster_id\", tenant=~\"${tenant}\"}) by (tenant,pod) > 0", "interval": "", "legendFormat": "{{tenant}}", "refId": "A" @@ -592,9 +590,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -604,7 +600,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -625,6 +621,24 @@ "multi": false, "name": "cluster", "options": [], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], "query": "label_values(loki_build_info, cluster_id)", "refresh": 1, "regex": "", @@ -648,7 +662,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster_id=\"$cluster_id\", cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, @@ -678,7 +692,7 @@ "multi": false, "name": "tenant", "options": [], - "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster_id=~\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", + "query": "query_result(sum by (id) (grafanacloud_logs_instance_info) and sum(label_replace(loki_tenant:active_streams{cluster_id=\"$cluster_id\", cluster=\"$cluster\",namespace=\"$namespace\"},\"id\",\"$1\",\"tenant\",\"(.*)\")) by(id))", "refresh": 0, "regex": "/\"([^\"]+)\"/", "sort": 1, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json index b5a84f31..6b3602a4 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-operational.json @@ -1,12 +1,23 @@ { "annotations": { - "list": [] + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "hideControls": false, - "iteration": 1588704280892, "links": [ { "asDropdown": true, @@ -14,19 +25,21 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", "type": "dashboards" } ], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -35,7 +48,15 @@ }, "id": 17, "panels": [], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Main", "type": "row" }, @@ -46,10 +67,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -76,10 +99,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -89,50 +113,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\")\n)", "legendFormat": "{{status}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Queries/Second", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 10, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -142,10 +158,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -172,10 +190,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -185,50 +204,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (status) (\nlabel_replace(\n label_replace(\n rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\"}[5m]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n\"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))", "legendFormat": "{{status}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Pushes/Second", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 10, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -236,10 +247,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -267,10 +280,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -280,50 +294,42 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10, sum(rate(loki_distributor_lines_received_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant))", "legendFormat": "{{tenant}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Lines Per Tenant (top 10)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -331,10 +337,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -363,10 +371,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -376,50 +385,42 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10, sum(rate(loki_distributor_bytes_received_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (tenant)) / 1024 / 1024", "legendFormat": "{{tenant}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "MBs Per Tenant (Top 10)", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -427,10 +428,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -457,10 +460,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -470,7 +474,10 @@ "steppedLine": false, "targets": [ { - "expr": "increase(kube_pod_container_status_restarts_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[10m]) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "increase(kube_pod_container_status_restarts_total{cluster_id=\"$cluster_id\", namespace=\"$namespace\"}[10m]) > 0", "hide": false, "interval": "", "legendFormat": "{{container}}-{{pod}}", @@ -478,44 +485,33 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Container Restarts", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -523,10 +519,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -555,10 +553,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -568,60 +567,58 @@ "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.75, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Push Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -629,10 +626,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -661,10 +660,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -674,60 +674,58 @@ "steppedLine": false, "targets": [ { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.9, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".9", "refId": "B" }, { + "datasource": { + "uid": "$datasource" + }, "expr": "histogram_quantile(0.5, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".5", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Distributor Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -735,10 +733,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -767,10 +767,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -780,52 +781,45 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Distributor Success Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "percentunit", "label": "", "logBase": 1, "max": "1", - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -833,10 +827,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -865,10 +861,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -878,62 +875,60 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.9, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.5, sum by (le) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\", cluster=~\"$cluster\"})) * 1e3", "hide": false, "legendFormat": ".5", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Ingester Latency Write", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -941,10 +936,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -973,10 +970,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -986,52 +984,45 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=\"/logproto.Pusher/Push\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Ingester Success Rate Write", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "percentunit", "label": "", "logBase": 1, "max": "1", - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1039,10 +1030,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1073,10 +1066,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1086,60 +1080,58 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.99", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.9", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"}))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"}))", "legendFormat": "{{route}}-.5", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Query Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1147,10 +1139,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1179,10 +1173,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1192,60 +1187,58 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"api_prom_query|api_prom_labels|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_label|loki_api_v1_label_name_values\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Querier Latency", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1253,10 +1246,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1285,10 +1280,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1298,52 +1294,45 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Querier Success Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "percentunit", "label": "", "logBase": 1, "max": "1", - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1351,11 +1340,13 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "description": "", "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1384,10 +1375,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1397,60 +1389,58 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".99-{{route}}", "refId": "A" }, { - "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.9, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".9-{{route}}", "refId": "B" }, { - "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=~\"$cluster\"})) * 1e3", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(0.5, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\", cluster=\"$cluster\"})) * 1e3", "legendFormat": ".5-{{route}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Ingester Latency Read", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1458,10 +1448,12 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fieldConfig": { "defaults": { - "custom": {} + "links": [] }, "overrides": [] }, @@ -1490,10 +1482,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1503,57 +1496,53 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"/logproto.Querier/Query|/logproto.Querier/Label|/logproto.Querier/Series|/logproto.Querier/QuerySample|/logproto.Querier/GetChunkIDs\"}[$__rate_interval])) by (route) > 0", "interval": "", "legendFormat": "{{route}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Ingester Success Rate Read", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "percentunit", "label": "", "logBase": 1, "max": "1", - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -1567,7 +1556,15 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -1591,10 +1588,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1604,56 +1602,50 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster=~\"$cluster\",namespace=\"$namespace\"}[1m])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10,sum by (tenant, reason) (rate(loki_discarded_samples_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\",namespace=\"$namespace\"}[1m])))", "interval": "", "legendFormat": "{{ tenant }} - {{ reason }}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Discarded Lines", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "columns": [], - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fontSize": "100%", "gridPos": { "h": 8, @@ -1662,7 +1654,6 @@ "y": 27 }, "id": 113, - "pageSize": null, "panels": [], "showHeader": true, "sort": { @@ -1680,7 +1671,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -1697,7 +1687,6 @@ { "alias": "", "align": "auto", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -1714,7 +1703,6 @@ { "alias": "", "align": "right", - "colorMode": null, "colors": [ "rgba(245, 54, 54, 0.9)", "rgba(237, 129, 40, 0.89)", @@ -1729,7 +1717,10 @@ ], "targets": [ { - "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster=~\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10, sum by (tenant, reason) (sum_over_time(increase(loki_discarded_samples_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\",namespace=\"$namespace\"}[1m])[$__range:1m])))", "format": "table", "instant": true, "interval": "", @@ -1737,20 +1728,29 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Discarded Lines Per Interval", "transform": "table", "type": "table-old" } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Limits", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -1764,14 +1764,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 28 + "y": 3 }, "hiddenSeries": false, "id": 26, @@ -1788,10 +1796,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": true, "renderer": "flot", @@ -1801,51 +1810,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1853,14 +1854,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 28 + "y": 3 }, "hiddenSeries": false, "id": 27, @@ -1879,10 +1888,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": true, "renderer": "flot", @@ -1892,7 +1902,10 @@ "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}", + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_heap_inuse_bytes{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}", "instant": false, "intervalFactor": 3, "legendFormat": "{{pod}}", @@ -1900,44 +1913,33 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -1945,14 +1947,22 @@ "bars": true, "dashLength": 10, "dashes": false, - "datasource": "$loki_datasource", + "datasource": { + "uid": "$loki_datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 4, "w": 12, "x": 12, - "y": 28 + "y": 3 }, "hiddenSeries": false, "id": 31, @@ -1969,10 +1979,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -1987,61 +1998,59 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} | logfmt | level=\"error\"[1m]))", + "datasource": { + "uid": "$loki_datasource" + }, + "expr": "sum(rate({cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"write\"} | logfmt | level=\"error\"[1m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Error Log Rate", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": false, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "datasource": "$loki_datasource", + "datasource": { + "uid": "$loki_datasource" + }, "gridPos": { - "h": 18, + "h": 22, "w": 12, "x": 12, - "y": 32 + "y": 7 }, "id": 29, "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, "showLabels": false, "showTime": false, "sortOrder": "Descending", @@ -2050,12 +2059,13 @@ "panels": [], "targets": [ { - "expr": "{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"} |= \"level=error\"", + "datasource": { + "uid": "$loki_datasource" + }, + "expr": "{cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"write\"} |= \"level=error\"", "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Logs", "type": "logs" }, @@ -2064,14 +2074,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 35 + "y": 10 }, "hiddenSeries": false, "id": 33, @@ -2088,10 +2106,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -2101,7 +2120,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -2109,44 +2131,33 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Success Rate", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2154,14 +2165,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 35 + "y": 10 }, "hiddenSeries": false, "id": 32, @@ -2178,10 +2197,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -2191,51 +2211,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_distributor_ingester_append_failures_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Append Failures By Ingester", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2243,14 +2255,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 42 + "y": 17 }, "hiddenSeries": false, "id": 34, @@ -2267,10 +2287,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -2280,51 +2301,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_distributor_bytes_received_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_distributor_bytes_received_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Bytes Received/Second", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2332,14 +2345,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 42 + "y": 17 }, "hiddenSeries": false, "id": 35, @@ -2356,10 +2377,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -2369,66 +2391,268 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_distributor_lines_received_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_distributor_lines_received_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (pod)", "intervalFactor": 1, "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Lines Received/Second", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 120, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count (count by (pod) (container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-write.*\"}))", + "instant": false, + "legendFormat": "write pods", + "range": true, + "refId": "A" + } + ], + "title": "Total write pods", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 24 + }, + "id": 121, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-write-.*\"})\n)\n", + "hide": false, + "instant": false, + "legendFormat": "{{persistentvolumeclaim}}", + "range": true, + "refId": "C" + } + ], + "title": "Disk Usage", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" } ], - "targets": [], "title": "Write Path", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 28 }, "id": 104, "panels": [ @@ -2437,14 +2661,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 30 + "y": 38 }, "hiddenSeries": false, "id": 106, @@ -2476,51 +2702,43 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}))", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10,sum by (tenant) (loki_ingester_memory_streams{cluster_id=\"$cluster_id\", cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}))", "interval": "", "legendFormat": "{{ tenant }}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Active Streams", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -2528,14 +2746,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 30 + "y": 38 }, "hiddenSeries": false, "id": 108, @@ -2567,66 +2787,69 @@ "steppedLine": false, "targets": [ { - "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]) > 0))", + "datasource": { + "uid": "$datasource" + }, + "expr": "topk(10, sum by (tenant) (rate(loki_ingester_streams_created_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]) > 0))", "interval": "", "legendFormat": "{{ tenant }}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Streams Created/Sec", "tooltip": { "shared": false, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Streams", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 29 }, "id": 94, "panels": [ @@ -2635,14 +2858,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, - "y": 31 + "y": 39 }, "hiddenSeries": false, "id": 102, @@ -2677,64 +2902,56 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))", "interval": "", "legendFormat": "Chunks", "refId": "A" }, { - "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) < 1", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(increase(loki_chunk_store_deduped_chunks_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))/sum(increase(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) < 1", "interval": "", "legendFormat": "De-Dupe Ratio", "refId": "B" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Chunks Flushed/Sec", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", @@ -2743,12 +2960,14 @@ "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "gridPos": { "h": 8, "w": 12, "x": 12, - "y": 31 + "y": 39 }, "heatmap": {}, "hideZeroBuckets": false, @@ -2761,7 +2980,10 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) by (le)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_ingester_chunk_size_bytes_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m])) by (le)", "format": "heatmap", "instant": false, "interval": "", @@ -2769,8 +2991,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Chunk Size Bytes", "tooltip": { "show": true, @@ -2780,34 +3000,29 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 0, "format": "bytes", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "yBucketBound": "auto" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 7, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, - "y": 39 + "y": 47 }, "hiddenSeries": false, "id": 96, @@ -2837,33 +3052,31 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by(reason) (rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval])) / ignoring(reason) group_left sum(rate(loki_ingester_chunks_flushed_total{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"$namespace/ingester\", namespace=~\"$namespace\"}[$__rate_interval]))", "interval": "", "legendFormat": "{{ reason }}" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Chunk Flush Reason %", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percentunit", - "label": null, "logBase": 1, "max": "1", "min": "0", @@ -2871,39 +3084,32 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cards": { - "cardPadding": null, - "cardRound": null - }, + "cards": {}, "color": { "cardColor": "#b4ff00", "colorScale": "sqrt", "colorScheme": "interpolateSpectral", "exponent": 0.5, - "max": null, - "min": null, "mode": "spectrum" }, "dataFormat": "tsbuckets", - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "gridPos": { "h": 9, "w": 12, "x": 12, - "y": 39 + "y": 47 }, "heatmap": {}, "hideZeroBuckets": true, @@ -2916,7 +3122,10 @@ "reverseYBuckets": false, "targets": [ { - "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by (le) (rate(loki_ingester_chunk_utilization_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}[1m]))", "format": "heatmap", "instant": false, "interval": "", @@ -2924,8 +3133,6 @@ "refId": "A" } ], - "timeFrom": null, - "timeShift": null, "title": "Chunk Utilization", "tooltip": { "show": true, @@ -2935,34 +3142,38 @@ "xAxis": { "show": true }, - "xBucketNumber": null, - "xBucketSize": null, "yAxis": { "decimals": 0, "format": "percentunit", "logBase": 1, - "max": null, - "min": null, - "show": true, - "splitFactor": null + "show": true + }, + "yBucketBound": "auto" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "yBucketBound": "auto", - "yBucketNumber": null, - "yBucketSize": null + "refId": "A" } ], - "targets": [], "title": "Chunks", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 30 }, "id": 64, "panels": [ @@ -2971,14 +3182,22 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 32 + "y": 65 }, "hiddenSeries": false, "id": 68, @@ -2995,10 +3214,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": true, "renderer": "flot", @@ -3008,74 +3228,72 @@ "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", "intervalFactor": 3, "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "aliasColors": {}, - "bars": false, + "bars": true, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$loki_datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 6, - "x": 0, - "y": 39 + "h": 3, + "w": 18, + "x": 6, + "y": 65 }, "hiddenSeries": false, - "id": 69, + "id": 65, "legend": { "avg": false, "current": false, - "hideEmpty": false, - "hideZero": false, "max": false, "min": false, "show": false, @@ -3086,86 +3304,125 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, - "pointradius": 1, - "points": true, + "pluginVersion": "10.1.5", + "pointradius": 2, + "points": false, "renderer": "flot", - "seriesOverrides": [], + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}", - "instant": false, - "intervalFactor": 3, - "legendFormat": "{{pod}}", + "datasource": { + "uid": "$loki_datasource" + }, + "expr": "sum(rate({cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"read\"} | logfmt | level=\"error\"[1m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "Memory Usage", + "title": "Error Log Rate", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, - "show": true, + "show": false, "values": [] }, "yaxes": [ { - "format": "bytes", - "label": null, + "format": "short", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, - "show": true + "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, + { + "datasource": { + "uid": "$loki_datasource" + }, + "gridPos": { + "h": 25, + "w": 18, + "x": 6, + "y": 68 + }, + "id": 66, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$loki_datasource" + }, + "expr": "{cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"read\"} |= \"level=error\"", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, { "aliasColors": {}, - "bars": true, + "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$loki_datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 3, - "w": 18, - "x": 12, - "y": 32 + "h": 7, + "w": 6, + "x": 0, + "y": 72 }, "hiddenSeries": false, - "id": 65, + "id": 69, "legend": { "avg": false, "current": false, + "hideEmpty": false, + "hideZero": false, "max": false, "min": false, "show": false, @@ -3176,109 +3433,81 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, - "pointradius": 2, - "points": false, + "pluginVersion": "10.1.5", + "pointradius": 1, + "points": true, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "{}", - "color": "#F2495C" - } - ], + "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { - "expr": "sum(rate({cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} | logfmt | level=\"error\"[1m]))", + "datasource": { + "uid": "$datasource" + }, + "expr": "go_memstats_heap_inuse_bytes{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "Error Log Rate", + "title": "Memory Usage", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, - "show": false, + "show": true, "values": [] }, "yaxes": [ { - "format": "short", - "label": null, + "format": "bytes", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, - "show": false + "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, - { - "datasource": "$loki_datasource", - "gridPos": { - "h": 18, - "w": 18, - "x": 12, - "y": 35 - }, - "id": 66, - "options": { - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": true - }, - "panels": [], - "targets": [ - { - "expr": "{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"} |= \"level=error\"", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Logs", - "type": "logs" - }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 46 + "y": 79 }, "hiddenSeries": false, "id": 70, @@ -3295,10 +3524,11 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "panels": [], "percentage": false, + "pluginVersion": "10.1.5", "pointradius": 1, "points": false, "renderer": "flot", @@ -3308,7 +3538,10 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}[$__rate_interval])) by (route) > 0", "interval": "", "intervalFactor": 1, "legendFormat": "{{route}}", @@ -3316,54 +3549,817 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Success Rate", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 86 + }, + "id": 122, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "count(count by(pod) (container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-read.*\"}))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Total read pods", + "type": "timeseries" + } + ], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" } ], - "targets": [], "title": "Read Path", "type": "row" }, { "collapsed": true, - "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 123, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 33 + }, + "id": 124, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-backend.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "points", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 4, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 33 + }, + "id": 125, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "go_memstats_heap_inuse_bytes{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki|enterprise-logs)-backend.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "$loki_datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "{}" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#C4162A", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byType", + "options": "time" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 127, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "uid": "$loki_datasource" + }, + "editorMode": "code", + "expr": "sum(rate({cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"backend\"} | logfmt | level=\"error\"[1m]))", + "queryType": "range", + "refId": "A" + } + ], + "title": "Error Log Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "$loki_datasource" + }, + "gridPos": { + "h": 15, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 128, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$loki_datasource" + }, + "editorMode": "code", + "expr": "{cluster_id=\"$cluster_id\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)\", component=\"backend\"} |= \"level=error\"", + "queryType": "range", + "refId": "A" + } + ], + "title": "Logs", + "type": "logs" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 40 + }, + "id": 126, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "sum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-backend\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki|enterprise-logs)-backend\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Success Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 5, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 40 + }, + "id": 130, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "panels": [], + "pluginVersion": "10.1.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "editorMode": "code", + "expr": "1-\n(\n sum without(instance, node) (kubelet_volume_stats_available_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n /\n sum without(instance, node) (kubelet_volume_stats_capacity_bytes{cluster_id=\"$cluster_id\", app=\"kubelet\", namespace=~\"$namespace\", persistentvolumeclaim=~\"data-loki-backend-.*\"})\n)\n", + "hide": false, + "instant": false, + "legendFormat": "{{persistentvolumeclaim}}", + "range": true, + "refId": "C" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 47 + }, + "id": 129, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count (count by (pod) (container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", pod=~\"(loki|enterprise-logs)-backend.*\"}))", + "instant": false, + "legendFormat": "write pods", + "range": true, + "refId": "A" + } + ], + "title": "Total Backend pods", + "type": "timeseries" + } + ], + "title": "Backend Path", + "type": "row" + }, + { + "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -3377,14 +4373,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 30 + "y": 38 }, "hiddenSeries": false, "id": 53, @@ -3417,63 +4415,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", "intervalFactor": 1, "legendFormat": "{{container}}: .99-{{method}}-{{name}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", "hide": false, "legendFormat": "{{container}}: .9-{{method}}-{{name}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_memcache_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (method, name, le, container))", "hide": false, "legendFormat": "{{container}}: .5-{{method}}-{{name}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3481,14 +4477,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 38 + "y": 46 }, "hiddenSeries": false, "id": 54, @@ -3521,61 +4519,64 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_memcache_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, method, name, container)", "intervalFactor": 1, "legendFormat": "{{container}}: {{status_code}}-{{method}}-{{name}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Memcached", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -3589,14 +4590,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 31 + "y": 39 }, "hiddenSeries": false, "id": 55, @@ -3629,63 +4632,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_consul_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_consul_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_consul_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3693,14 +4694,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 39 + "y": 47 }, "hiddenSeries": false, "id": 58, @@ -3733,61 +4736,64 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_consul_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, status_code, method)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Consul", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -3801,14 +4807,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 9 + "y": 17 }, "hiddenSeries": false, "id": 41, @@ -3839,59 +4847,57 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".9", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (operation, le))", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "MutateRows Latency", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -3899,14 +4905,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 9 + "y": 17 }, "hiddenSeries": false, "id": 46, @@ -3937,64 +4945,62 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", "interval": "", "intervalFactor": 1, "legendFormat": "99%", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", "interval": "", "legendFormat": "90%", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (operation, le))", "interval": "", "legendFormat": "50%", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ReadRows Latency", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4002,14 +5008,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 9 + "y": 17 }, "hiddenSeries": false, "id": 44, @@ -4040,64 +5048,62 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", "interval": "", "intervalFactor": 1, "legendFormat": "99%", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", "interval": "", "legendFormat": "90%", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (operation, le))", "interval": "", "legendFormat": "50%", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GetTable Latency", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4105,14 +5111,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 9 + "y": 17 }, "hiddenSeries": false, "id": 45, @@ -4143,59 +5151,57 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".9", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_bigtable_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (operation, le))", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ListTables Latency", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4203,14 +5209,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 0, - "y": 16 + "y": 24 }, "hiddenSeries": false, "id": 47, @@ -4241,51 +5249,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/MutateRows\"}[5m])) by (status_code)", "intervalFactor": 1, "legendFormat": "{{status_code}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "MutateRows Status", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4293,14 +5293,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 6, - "y": 16 + "y": 24 }, "hiddenSeries": false, "id": 50, @@ -4331,51 +5333,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.v2.Bigtable/ReadRows\"}[5m])) by (status_code)", "intervalFactor": 1, "legendFormat": "{{status_code}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ReadRows Status", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4383,14 +5377,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 12, - "y": 16 + "y": 24 }, "hiddenSeries": false, "id": 48, @@ -4421,51 +5417,43 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/GetTable\"}[5m])) by (status_code)", "intervalFactor": 1, "legendFormat": "{{status_code}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GetTable Status", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4473,14 +5461,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 16 + "y": 24 }, "hiddenSeries": false, "id": 49, @@ -4511,61 +5501,64 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_bigtable_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\", operation=\"/google.bigtable.admin.v2.BigtableTableAdmin/ListTables\"}[5m])) by (status_code)", "intervalFactor": 1, "legendFormat": "{{status_code}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "ListTables Status", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Big Table", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -4579,14 +5572,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 33 + "y": 41 }, "hiddenSeries": false, "id": 61, @@ -4619,63 +5614,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_gcs_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4683,14 +5676,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 41 + "y": 49 }, "hiddenSeries": false, "id": 62, @@ -4723,61 +5718,64 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_gcs_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "GCS", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -4791,14 +5789,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 6, "x": 0, - "y": 9 + "y": 17 }, "id": 82, "legend": { @@ -4827,49 +5828,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_dynamo_failures_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m]))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(rate(cortex_dynamo_failures_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Failure Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4877,14 +5871,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 6, "x": 6, - "y": 9 + "y": 17 }, "id": 83, "legend": { @@ -4913,49 +5910,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_dynamo_consumed_capacity_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m]))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(rate(cortex_dynamo_consumed_capacity_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Consumed Capacity Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -4963,14 +5953,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 6, "x": 12, - "y": 9 + "y": 17 }, "id": 84, "legend": { @@ -4999,49 +5992,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_dynamo_throttled_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m]))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(rate(cortex_dynamo_throttled_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Throttled Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5049,14 +6035,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 6, "x": 18, - "y": 9 + "y": 17 }, "id": 85, "legend": { @@ -5085,49 +6074,42 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_dynamo_dropped_requests_total{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m]))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "sum(rate(cortex_dynamo_dropped_requests_total{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m]))", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Dropped Rate", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5135,14 +6117,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 6, "x": 0, - "y": 15 + "y": 23 }, "id": 86, "legend": { @@ -5171,60 +6156,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_query_pages_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_query_pages_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".99", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_query_pages_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_query_pages_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".9", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_query_pages_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])))", + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_query_pages_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])))", "legendFormat": ".5", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Query Pages", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5232,14 +6218,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 9, "x": 6, - "y": 15 + "y": 23 }, "id": 87, "interval": "", @@ -5271,63 +6259,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_dynamo_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(cortex_dynamo_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5335,14 +6321,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 9, "x": 15, - "y": 15 + "y": 23 }, "id": 88, "interval": "", @@ -5374,61 +6362,64 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_dynamo_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(cortex_dynamo_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Dynamo", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, @@ -5442,14 +6433,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 10 + "y": 18 }, "id": 79, "interval": "", @@ -5481,63 +6474,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_s3_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_s3_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_s3_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5545,14 +6536,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 18 + "y": 26 }, "id": 80, "interval": "", @@ -5584,84 +6577,89 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_s3_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "S3", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 38 }, - "id": 78, + "id": 117, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 10 + "y": 18 }, - "id": 79, + "id": 118, "interval": "", "legend": { "alignAsTable": true, @@ -5691,63 +6689,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_azure_blob_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5755,16 +6751,18 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 18 + "y": 26 }, - "id": 80, + "id": 119, "interval": "", "legend": { "alignAsTable": true, @@ -5794,66 +6792,69 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_azure_blob_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "Azure Blob", "type": "row" }, { "collapsed": true, - "datasource": null, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 39 }, "id": 114, "panels": [ @@ -5862,14 +6863,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 10 + "y": 18 }, "id": 115, "interval": "", @@ -5901,63 +6904,61 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "intervalFactor": 1, "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.9, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", + "datasource": { + "uid": "$datasource" + }, + "expr": "histogram_quantile(.5, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (operation, le))", "hide": false, "legendFormat": ".5-{{operation}}", "refId": "C" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Latency By Operation", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -5965,14 +6966,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "$datasource", + "datasource": { + "uid": "$datasource" + }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 24, "x": 0, - "y": 18 + "y": 26 }, "id": 116, "interval": "", @@ -6004,136 +7007,179 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster=~\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=\"$cluster\", namespace=\"$namespace\"}[5m])) by (status_code, operation)", "intervalFactor": 1, "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Status By Method", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "timeseries", + "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "targets": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "refId": "A" + } + ], "title": "BoltDB Shipper", "type": "row" } ], - "refresh": "10s", - "rows": [], - "schemaVersion": 14, + "refresh": "1m", + "schemaVersion": 38, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ { "current": { + "selected": false, "text": "default", "value": "default" }, "hide": 0, - "label": "Data source", + "includeAll": false, + "label": "Data Source", + "multi": false, "name": "datasource", "options": [], "query": "prometheus", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { + "current": { + "selected": false, + "text": "Loki", + "value": "P8E80F9AEF21F6940" + }, "hide": 0, - "label": null, + "includeAll": false, + "multi": false, "name": "loki_datasource", "options": [], "query": "loki", "refresh": 1, "regex": "", + "skipUrlSync": false, "type": "datasource" }, { - "allValue": null, "current": { - "text": "prod", - "value": "prod" + "selected": false, + "text": "loki", + "value": "loki" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "datasource": "$datasource", + "definition": "", "hide": 0, "includeAll": false, "label": "cluster", "multi": false, "name": "cluster", "options": [], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "anteater", + "value": "anteater" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" + }, + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], "query": "label_values(loki_build_info, cluster_id)", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, "current": { - "text": "prod", - "value": "prod" + "selected": false, + "text": "loki", + "value": "loki" + }, + "datasource": { + "type": "prometheus", + "uid": "$datasource" }, - "datasource": "$datasource", + "definition": "", "hide": 0, "includeAll": false, "label": "namespace", "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster_id=\"$cluster_id\", cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", + "skipUrlSync": false, "sort": 2, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -6172,5 +7218,6 @@ "timezone": "utc", "title": "Loki / Operational", "uid": "loki-operational", - "version": 0 + "version": 1, + "weekStart": "" } diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads-resources.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads-resources.json index e59a9492..c9f64b60 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads-resources.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads-resources.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -29,397 +27,514 @@ "collapsed": false, "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "gridPos": {}, "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster_id=\"$cluster_id\",namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "CPU", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "gridPos": {}, "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (workingset)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "fill": 1, "gridPos": {}, "id": 3, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (go heap inuse)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, + "fill": 10, "gridPos": {}, "id": 4, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Writes", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, + "fill": 10, "gridPos": {}, "id": 5, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Reads", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, + "fill": 1, "gridPos": {}, "id": 6, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster_id=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-read.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Space Utilization", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -435,256 +550,280 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "CPU", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (workingset)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "fill": 1, "id": 9, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster_id\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (go heap inuse)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -698,9 +837,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -710,7 +847,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -718,6 +855,24 @@ "regex": "", "type": "datasource" }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], + "query": "label_values(loki_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -731,7 +886,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -754,7 +909,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads.json index f0796ef7..47de2a3c 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-reads.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -35,256 +33,156 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, "fill": 10, "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "QPS", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, + "fill": 1, "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "legendFormat": "{{ route }} 99th percentile", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{ route }} 99th Percentile", + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le,route) (cluster_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"})) * 1e3", "format": "time_series", - "legendFormat": "{{ route }} 50th percentile", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{ route }} 50th Percentile", + "refId": "B", + "step": 10 }, { - "expr": "1e3 * sum(cluster_id_job_route:loki_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_id_job_route:loki_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", + "expr": "1e3 * sum(cluster_job_route:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) / sum(cluster_job_route:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}) by (route) ", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{ route }} Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Latency", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -305,55 +203,89 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + } }, + "fill": 1, "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, + "expr": "histogram_quantile(0.99,\n sum(\n rate(loki_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\", route=~\"loki_api_v1_series|api_prom_series|api_prom_query|api_prom_label|api_prom_label_name_values|loki_api_v1_query|loki_api_v1_query_range|loki_api_v1_labels|loki_api_v1_label_name_values\"}[$__rate_interval])\n ) by (pod, le)\n )\n", + "instant": false, "legendFormat": "__auto", - "refId": "A", - "step": 10 + "range": true, + "refId": "A" } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Per Pod Latency (p99)", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -374,256 +306,156 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, "fill": 10, "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "QPS", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, + "fill": 1, "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Latency", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -644,55 +476,89 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] + "custom": { + "fillOpacity": 50, + "showPoints": "never", + "stacking": { + "group": "A", + "mode": "normal" + } + } }, + "fill": 1, "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])) by (le,pod)) * 1e3", - "format": "time_series", - "interval": "1m", - "intervalFactor": 2, + "expr": "histogram_quantile(0.99,\n sum(\n rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-read\", operation=\"Shipper.Query\"}[$__rate_interval])\n ) by (pod, le)\n )\n", + "instant": false, "legendFormat": "__auto", - "refId": "A", - "step": 10 + "range": true, + "refId": "A" } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Per Pod Latency (p99)", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -706,9 +572,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -718,7 +582,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -739,6 +603,24 @@ "multi": false, "name": "cluster", "options": [], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], "query": "label_values(loki_build_info, cluster_id)", "refresh": 1, "regex": "", @@ -762,7 +644,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-retention.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-retention.json index a9f0fbc0..718abf7f 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-retention.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-retention.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -29,256 +27,280 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "CPU", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-read.*\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (workingset)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "fill": 1, "id": 3, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (go heap inuse)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -353,12 +375,12 @@ "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, - "span": 6, + "span": 4, "stack": false, "steppedLine": false, "targets": [ { - "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "expr": "loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", "format": "time_series", "instant": true, "refId": "A" @@ -367,7 +389,7 @@ "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Last Compact Tables Operation Success", + "title": "Last Compact and Mark Operation Success", "tooltip": { "shared": true, "sort": 2, @@ -401,164 +423,163 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "s" - }, - "overrides": [] - }, + "fill": 1, "id": 5, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "span": 6, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}", + "expr": "loki_boltdb_shipper_compact_tables_operation_duration_seconds{cluster=~\"$cluster\", namespace=~\"$namespace\"}", "format": "time_series", + "intervalFactor": 2, "legendFormat": "duration", - "legendLink": null + "legendLink": null, + "step": 10 } ], - "title": "Compact Tables Operations Duration", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Compaction", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compact and Mark Operations Duration", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "id": 6, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "span": 6, - "targets": [ + "yaxes": [ { - "expr": "sum(increase(loki_compactor_skipped_compacting_locked_table_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__range]))", - "format": "time_series", - "legendFormat": "{{table_name}}", - "legendLink": null + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "title": "Number of times Tables were skipped during Compaction", - "type": "timeseries" + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 7, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by (status)(rate(loki_boltdb_shipper_compact_tables_operation_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{success}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], - "title": "Compact Tables Operations Per Status", - "type": "timeseries" + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compact and Mark Operations Per Status", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "", + "title": "Compact and Mark", "titleSize": "h6" }, { @@ -571,27 +592,8 @@ "dashLength": 10, "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "color": { - "fixedColor": "blue", - "mode": "fixed" - }, - "custom": {}, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "dateTimeFromNow" - } - }, - "fill": 1, - "id": 8, + "fill": 10, + "id": 7, "legend": { "avg": false, "current": false, @@ -602,24 +604,9 @@ "values": false }, "lines": true, - "linewidth": 1, + "linewidth": 0, "links": [], "nullPointMode": "null as zero", - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, "percentage": false, "pointradius": 5, "points": false, @@ -627,26 +614,28 @@ "seriesOverrides": [], "spaceLength": 10, "span": 4, - "stack": false, + "stack": true, "steppedLine": false, "targets": [ { - "expr": "loki_compactor_apply_retention_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} * 1e3", + "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", - "instant": true, - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{action}}", + "legendLink": null, + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Last Mark Operation Success", + "title": "Processed Tables Per Action", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, - "type": "stat", + "type": "graph", "xaxis": { "buckets": null, "mode": "time", @@ -674,251 +663,156 @@ ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "s" - }, - "overrides": [] + "fill": 10, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 9, + "lines": true, + "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "loki_compactor_apply_retention_operation_duration_seconds{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}", + "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", "format": "time_series", - "legendFormat": "duration", - "legendLink": null + "intervalFactor": 2, + "legendFormat": "{{table}}-{{action}}", + "legendLink": null, + "step": 10 } ], - "title": "Mark Operations Duration", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Modified Tables", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "id": 10, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "span": 4, - "targets": [ + "yaxes": [ { - "expr": "sum by (status)(rate(loki_compactor_apply_retention_operation_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{success}}", - "legendLink": null + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "title": "Mark Operations Per Status", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Retention", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + ] + }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 10, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 11, + "lines": true, + "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "count by(action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", "format": "time_series", - "legendFormat": "{{action}}", - "legendLink": null + "intervalFactor": 2, + "legendFormat": "{{table}}", + "legendLink": null, + "step": 10 } ], - "title": "Processed Tables Per Action", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Marks Creation Rate Per Table", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "id": 12, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "span": 4, - "targets": [ + "yaxes": [ { - "expr": "count by(table,action)(loki_boltdb_shipper_retention_marker_table_processed_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\" , action=~\"modified|deleted\"})", - "format": "time_series", - "legendFormat": "{{table}}-{{action}}", - "legendLink": null - } - ], - "title": "Modified Tables", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, - "id": 13, - "links": [], - "options": { - "legend": { - "showLegend": true + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ { - "expr": "sum by (table)(rate(loki_boltdb_shipper_retention_marker_count_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >0", - "format": "time_series", - "legendFormat": "{{table}}", - "legendLink": null + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false } - ], - "title": "Marks Creation Rate Per Table", - "type": "timeseries" + ] } ], "repeat": null, @@ -933,113 +827,154 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "format": "short", - "id": 14, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "expr": "sum (increase(loki_boltdb_shipper_retention_marker_count_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, + "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, "title": "Marked Chunks (24h)", - "type": "singlestat" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 15, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_marker_table_processed_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Mark Table Latency", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -1072,113 +1007,154 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "format": "short", - "id": 16, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", + "expr": "sum (increase(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[24h]))", "format": "time_series", "instant": true, + "intervalFactor": 2, "refId": "A" } ], "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, "title": "Delete Chunks (24h)", - "type": "singlestat" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 17, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Delete Latency", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -1211,145 +1187,232 @@ "height": "250px", "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "s" - }, - "overrides": [] + "fill": 1, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 18, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} > 0)", + "expr": "time() - (loki_boltdb_shipper_retention_sweeper_marker_file_processing_current_time{cluster=~\"$cluster\", namespace=~\"$namespace\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "lag", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Sweeper Lag", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 19, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster_id=~\"$cluster\", namespace=~\"$namespace\"})", + "expr": "sum(loki_boltdb_shipper_retention_sweeper_marker_files_current{cluster=~\"$cluster\", namespace=~\"$namespace\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "count", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Marks Files to Process", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 20, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 4, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by (status)(rate(loki_boltdb_shipper_retention_sweeper_chunk_deleted_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Delete Rate Per Status", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -1365,11 +1428,11 @@ "panels": [ { "datasource": "$loki_datasource", - "id": 21, + "id": 17, "span": 12, "targets": [ { - "expr": "{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}", + "expr": "{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-read\"}", "refId": "A" } ], @@ -1388,9 +1451,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -1400,7 +1461,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -1421,7 +1482,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -1444,7 +1505,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, @@ -1497,6 +1558,6 @@ }, "timezone": "utc", "title": "Loki / Retention", - "uid": "loki-retention", + "uid": "retention", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes-resources.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes-resources.json index c3f25f4e..a48f6b63 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes-resources.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes-resources.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -29,447 +27,589 @@ "collapsed": false, "panels": [ { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, + "fill": 1, "gridPos": {}, "id": 1, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (loki_ingester_memory_streams{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", + "expr": "sum by(pod) (loki_ingester_memory_streams{cluster_id=\"$cluster_id\", cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "In-memory streams", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "gridPos": {}, "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "CPU", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, + "fill": 1, "gridPos": {}, "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "request", + "color": "#FFC000", + "fill": 0 }, - "tooltip": { - "mode": "single", - "sort": "none" + { + "alias": "limit", + "color": "#E02F44", + "fill": 0 } - }, + ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "request", - "legendLink": null + "legendLink": null, + "step": 10 }, { - "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\"} > 0)", "format": "time_series", + "intervalFactor": 2, "legendFormat": "limit", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (workingset)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, + "fill": 1, "gridPos": {}, "id": 4, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster_id\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Memory (go heap inuse)", "tooltip": { "sort": 2 }, - "type": "timeseries" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, + "fill": 10, "gridPos": {}, "id": 5, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Writes", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, + "fill": 10, "gridPos": {}, "id": 6, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 0, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{pod}} - {{device}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Reads", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, + "fill": 1, "gridPos": {}, "id": 7, - "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster_id=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster_id=~\"$cluster_id\", namespace=~\"$namespace\",label_name=~\"(loki|enterprise-logs)-write.*\"})", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null + "legendLink": null, + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Disk Space Utilization", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] } ], "repeat": null, @@ -484,9 +624,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -496,7 +634,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -504,6 +642,24 @@ "regex": "", "type": "datasource" }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], + "query": "label_values(loki_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -517,7 +673,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -540,7 +696,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes.json b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes.json index 0e48dc59..0965c18e 100644 --- a/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes.json +++ b/helm/dashboards/charts/private_dashboards_al/dashboards/shared/private/loki-writes.json @@ -13,9 +13,7 @@ "includeVars": true, "keepTime": true, "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "targetBlank": false, "title": "Loki Dashboards", @@ -35,259 +33,67 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, "fill": 10, "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "QPS", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" }, - "id": 2, - "links": [], - "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] }, - "span": 6, - "targets": [ - { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", - "format": "time_series", - "legendFormat": "99th percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:loki_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})) * 1e3", - "format": "time_series", - "legendFormat": "50th percentile", - "refId": "B" - }, - { - "expr": "1e3 * sum(cluster_id_job_route:loki_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"}) / sum(cluster_id_job_route:loki_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\", route=~\"api_prom_push|loki_api_v1_push|/httpgrpc.HTTP/Handle\"})", - "format": "time_series", - "legendFormat": "Average", - "refId": "C" - } - ], - "title": "Latency", - "type": "timeseries", "yaxes": [ { - "format": "ms", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -303,119 +109,86 @@ "show": false } ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Write Path", - "titleSize": "h6" - }, - { - "collapse": false, - "height": "250px", - "panels": [ + }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 3, + "lines": true, + "linewidth": 1, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "sum (rate(loki_distributor_structured_metadata_bytes_received_total{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval])) / sum(rate(loki_distributor_bytes_received_total{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval]))", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3", "format": "time_series", - "legendFormat": "bytes", - "legendLink": null - } - ], - "title": "Per Total Received Bytes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A", + "step": 10 }, - "overrides": [] - }, - "id": 4, - "links": [], - "options": { - "legend": { - "showLegend": true + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_job:loki_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})) * 1e3", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B", + "step": 10 }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 6, - "stack": true, - "targets": [ { - "expr": "sum by (tenant) (rate(loki_distributor_structured_metadata_bytes_received_total{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval])) / ignoring(tenant) group_left sum(rate(loki_distributor_structured_metadata_bytes_received_total{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\",}[$__rate_interval]))", + "expr": "1e3 * sum(cluster_job:loki_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"}) / sum(cluster_job:loki_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/(loki|enterprise-logs)-write\"})", "format": "time_series", - "legendFormat": "{{tenant}}", - "legendLink": null + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C", + "step": 10 } ], - "title": "Per Tenant", - "type": "timeseries", + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { - "format": "short", + "format": "ms", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": 0, "show": true }, @@ -423,7 +196,7 @@ "format": "short", "label": null, "logBase": 1, - "max": 1, + "max": null, "min": null, "show": false } @@ -448,256 +221,156 @@ "3xx": "#6ED0E0", "4xx": "#EF843C", "5xx": "#E24D42", - "OK": "#7EB26D", - "cancel": "#A9A9A9", "error": "#E24D42", "success": "#7EB26D" }, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cancel" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "error" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - } - ] - }, "fill": 10, - "id": 5, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, "linewidth": 0, "links": [], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, "stack": true, + "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", + "intervalFactor": 2, "legendFormat": "{{status}}", - "refId": "A" + "refId": "A", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "QPS", - "type": "timeseries" + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] }, { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] + "fill": 1, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false }, - "id": 6, + "lines": true, + "linewidth": 1, "links": [], "nullPointMode": "null as zero", - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, "span": 6, + "stack": false, + "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "99th Percentile", - "refId": "A" + "refId": "A", + "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(loki_boltdb_shipper_request_duration_seconds_bucket{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", + "intervalFactor": 2, "legendFormat": "50th Percentile", - "refId": "B" + "refId": "B", + "step": 10 }, { - "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))", + "expr": "sum(rate(loki_boltdb_shipper_request_duration_seconds_sum{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval])) * 1e3 / sum(rate(loki_boltdb_shipper_request_duration_seconds_count{cluster_id=\"$cluster_id\", cluster=~\"$cluster\",job=~\"($namespace)/(loki|enterprise-logs)-write\", operation=\"WRITE\"}[$__rate_interval]))", "format": "time_series", + "intervalFactor": 2, "legendFormat": "Average", - "refId": "C" + "refId": "C", + "step": 10 } ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, "title": "Latency", - "type": "timeseries", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, "yaxes": [ { "format": "ms", @@ -729,9 +402,7 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "owner:team-atlas", - "topic:observability", - "component:loki" + "loki" ], "templating": { "list": [ @@ -741,7 +412,7 @@ "value": "default" }, "hide": 0, - "label": "Data source", + "label": "Data Source", "name": "datasource", "options": [], "query": "prometheus", @@ -749,6 +420,24 @@ "regex": "", "type": "datasource" }, + { + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Kube cluster", + "multi": false, + "name": "cluster_id", + "options": [], + "query": "label_values(loki_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, { "allValue": null, "current": { @@ -762,7 +451,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(loki_build_info, cluster_id)", + "query": "label_values(loki_build_info, cluster)", "refresh": 1, "regex": "", "sort": 2, @@ -785,7 +474,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(loki_build_info{cluster_id=~\"$cluster\"}, namespace)", + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 2, diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager-resources.json new file mode 100644 index 00000000..147b8dd7 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager-resources.json @@ -0,0 +1,701 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"alertmanager\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?alertmanager.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Network", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"alertmanager\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Disk", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(alertmanager).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager resources", + "uid": "mimir-a6883fb22799ac74479c7db872451092", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager.json new file mode 100644 index 00000000..4292cc72 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-alertmanager.json @@ -0,0 +1,2448 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_id_job_pod:cortex_alertmanager_alerts:sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total alerts", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_id_job_pod:cortex_alertmanager_silences:sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Total silences", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "format": "short", + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max(cortex_alertmanager_tenants_discovered{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Tenants", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "QPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", route=~\"/alertmanagerpb.Alertmanager/HandleRequest\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Distributor", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum(cluster_id_job:cortex_alertmanager_alerts_received_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_id_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_id_job:cortex_alertmanager_alerts_invalid_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "APS", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts received", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "cortex_alertmanager_dispatcher_aggregation_groups{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "per pod Active Aggregation Groups", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alerts grouping", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_id_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_id_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_id_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(cluster_id_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n-\nsum(cluster_id_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)\n) > 0\nor on () vector(0)\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(cluster_id_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_notification_latency_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_notification_latency_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_notification_latency_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alert notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Operations / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager Configuration Object Store (Alertmanager accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 18, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"alertmanager-storage\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency of op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 19, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (cortex_alertmanager_tenants_owned{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod tenants", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 20, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_id_job_pod:cortex_alertmanager_alerts:sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 21, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cluster_id_job_pod:cortex_alertmanager_silences:sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Per pod silences", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Replication", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 22, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_sync_configs_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_sync_configs_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Syncs/sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 23, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(reason) (rate(cortex_alertmanager_sync_configs_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Syncs/sec (by reason)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 24, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum (rate(cortex_alertmanager_ring_check_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "errors", + "legendLink": null + } + ], + "title": "Ring check errors/sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant configuration sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 25, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "{{outcome}}", + "legendLink": null + } + ], + "title": "Initial syncs /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 26, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "interval": "1m", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_alertmanager_state_initial_sync_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Initial sync duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 27, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "interval": "1m", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "interval": "1m", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Fetch state from other alertmanagers /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding initial state sync", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 28, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_id_job:cortex_alertmanager_state_replication_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_id_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_id_job:cortex_alertmanager_state_replication_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Replicate state to other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 29, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cluster_id_job:cortex_alertmanager_partial_state_merges_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n-\nsum(cluster_id_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(cluster_id_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Merge state from other alertmanagers /sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_alertmanager_state_persist_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_alertmanager_state_persist_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_state_persist_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Persist state to remote storage /sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Sharding runtime state sync", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Alertmanager", + "uid": "mimir-b0d38d318bbddd80476246d4930f9e55", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor-resources.json index d031a90b..ae7e7221 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor-resources.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor-resources.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -124,19 +126,19 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -184,7 +186,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -295,19 +297,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "expr": "max by(pod) (container_memory_rss{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -406,19 +408,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"compactor\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -478,7 +480,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -526,7 +528,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?compactor.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -586,7 +588,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -634,7 +636,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"compactor\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -685,7 +687,7 @@ "span": 4, "targets": [ { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(compactor).*\"\n }\n)\n", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -706,7 +708,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -725,7 +729,7 @@ "type": "datasource" }, { - "allValue": ".*", + "allValue": ".+", "current": { "selected": true, "text": "All", @@ -738,7 +742,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -762,7 +766,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -805,6 +809,6 @@ }, "timezone": "utc", "title": "Mimir / Compactor resources", - "uid": "09a5c49e9cdb2f2b24c6d184574a07fd", + "uid": "mimir-09a5c49e9cdb2f2b24c6d184574a07fd", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor.json index d306b768..08d97b65 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-compactor.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -120,19 +122,19 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_compactor_runs_started_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_compactor_runs_started_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "started", "legendLink": null }, { - "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_compactor_runs_completed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "completed", "legendLink": null }, { - "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_compactor_runs_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -165,7 +167,7 @@ "span": 3, "targets": [ { - "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", + "expr": "(\n cortex_compactor_tenants_processing_succeeded{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_processing_failed{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} +\n cortex_compactor_tenants_skipped{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}\n)\n/\ncortex_compactor_tenants_discovered{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"} > 0\n", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -291,7 +293,7 @@ "steppedLine": false, "targets": [ { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", "format": "table", "instant": true, "interval": "", @@ -501,7 +503,7 @@ "span": 3, "targets": [ { - "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", + "expr": "max by(pod)\n(\n (time() * (max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h]) !=bool 0))\n -\n max_over_time(cortex_compactor_last_successful_run_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[1h])\n)\n", "format": "table", "instant": true, "legendFormat": "Last run", @@ -583,6 +585,55 @@ "collapse": false, "height": "250px", "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs based on latest version of bucket index. Ingesters upload new blocks every 2 hours (shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.),\nand compactors should process all of them within 2h interval. If this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction works as designed.\n\nMetric with number of compaction jobs is computed from blocks in bucket index, which is updated regularly. Metric doesn't change between bucket index updates, even if\nthere were compaction jobs finished in this time. When computing compaction jobs, only jobs that can be executed at given moment are counted. There can be more\njobs, but if they are blocked, they are not counted in the metric. For example if there is a split compaction job pending for some time range, no merge job\ncovering the same time range can run. In this case only split compaction job is counted toward the metric, but merge job isn't.\n\nIn other words, computed number of compaction jobs is the minimum number of compaction jobs based on latest version of bucket index.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", + "format": "time_series", + "legendFormat": "Jobs", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, { "datasource": "$datasource", "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", @@ -609,7 +660,7 @@ }, "overrides": [] }, - "id": 5, + "id": 6, "links": [], "options": { "legend": { @@ -620,10 +671,10 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "compactions", "legendLink": null @@ -658,7 +709,7 @@ }, "overrides": [] }, - "id": 6, + "id": 7, "links": [], "nullPointMode": "null as zero", "options": { @@ -670,22 +721,22 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(prometheus_tsdb_compaction_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(prometheus_tsdb_compaction_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -749,7 +800,7 @@ }, "overrides": [] }, - "id": 7, + "id": 8, "links": [], "options": { "legend": { @@ -763,7 +814,7 @@ "span": 6, "targets": [ { - "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "avg(max by(user) (cortex_bucket_blocks_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null @@ -798,7 +849,7 @@ }, "overrides": [] }, - "id": 8, + "id": 9, "links": [], "options": { "legend": { @@ -812,7 +863,7 @@ "span": 6, "targets": [ { - "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", + "expr": "topk(10, max by(user) (cortex_bucket_blocks_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}))", "format": "time_series", "legendFormat": "{{user}}", "legendLink": null @@ -858,7 +909,7 @@ }, "overrides": [] }, - "id": 9, + "id": 10, "links": [], "options": { "legend": { @@ -872,7 +923,7 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_compactor_blocks_marked_for_deletion_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "blocks", "legendLink": null @@ -937,7 +988,7 @@ } ] }, - "id": 10, + "id": 11, "links": [], "options": { "legend": { @@ -951,13 +1002,13 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_compactor_blocks_cleaned_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_compactor_block_cleanup_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1034,7 +1085,7 @@ } ] }, - "id": 11, + "id": 12, "links": [], "options": { "legend": { @@ -1048,13 +1099,13 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_compactor_meta_syncs_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_compactor_meta_sync_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_compactor_meta_sync_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1088,7 +1139,7 @@ }, "overrides": [] }, - "id": 12, + "id": 13, "links": [], "nullPointMode": "null as zero", "options": { @@ -1103,19 +1154,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_meta_sync_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_compactor_meta_sync_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_meta_sync_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1179,7 +1230,7 @@ }, "overrides": [] }, - "id": 13, + "id": 14, "links": [], "options": { "legend": { @@ -1193,7 +1244,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -1212,7 +1263,7 @@ "unit": "percentunit" } }, - "id": 14, + "id": 15, "links": [], "options": { "legend": { @@ -1226,7 +1277,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\"}[$__rate_interval])) >= 0", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -1260,7 +1311,7 @@ }, "overrides": [] }, - "id": 15, + "id": 16, "links": [], "nullPointMode": "null as zero", "options": { @@ -1275,19 +1326,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"attributes\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1339,7 +1390,7 @@ }, "overrides": [] }, - "id": 16, + "id": 17, "links": [], "nullPointMode": "null as zero", "options": { @@ -1354,19 +1405,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"exists\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1430,7 +1481,7 @@ }, "overrides": [] }, - "id": 17, + "id": 18, "links": [], "nullPointMode": "null as zero", "options": { @@ -1445,19 +1496,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1509,7 +1560,7 @@ }, "overrides": [] }, - "id": 18, + "id": 19, "links": [], "nullPointMode": "null as zero", "options": { @@ -1524,19 +1575,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"get_range\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1588,7 +1639,7 @@ }, "overrides": [] }, - "id": 19, + "id": 20, "links": [], "nullPointMode": "null as zero", "options": { @@ -1603,19 +1654,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"upload\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1667,7 +1718,7 @@ }, "overrides": [] }, - "id": 20, + "id": 21, "links": [], "nullPointMode": "null as zero", "options": { @@ -1682,19 +1733,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"compactor\",operation=\"delete\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1894,7 +1945,7 @@ } ] }, - "id": 21, + "id": 22, "links": [], "options": { "legend": { @@ -1908,7 +1959,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1942,7 +1993,7 @@ }, "overrides": [] }, - "id": 22, + "id": 23, "links": [], "nullPointMode": "null as zero", "options": { @@ -1957,19 +2008,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", kv_name=~\".+\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2008,7 +2059,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -2040,7 +2093,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -2064,7 +2117,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -2107,6 +2160,6 @@ }, "timezone": "utc", "title": "Mimir / Compactor", - "uid": "1b3443aea86db629e6efdb7d05c53823", + "uid": "mimir-1b3443aea86db629e6efdb7d05c53823", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-config.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-config.json new file mode 100644 index 00000000..f7affef2 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-config.json @@ -0,0 +1,262 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "instances" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_config_hash{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Startup config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Startup config file", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "instances" + }, + "overrides": [] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "count(cortex_runtime_config_hash{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}) by (sha256)", + "format": "time_series", + "legendFormat": "sha256:{{sha256}}", + "legendLink": null + } + ], + "title": "Runtime config file hashes", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Runtime config file", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Config", + "uid": "mimir-5d9d0b4724c0f80d68467088ec61e003", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-object-store.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-object-store.json new file mode 100644 index 00000000..e2fcf6f1 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-object-store.json @@ -0,0 +1,826 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "RPS / component", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(component) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(component) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{component}}", + "legendLink": null + } + ], + "title": "Error rate / component", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Components", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "RPS / operation", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "max": 1, + "min": 0, + "noValue": "0", + "unit": "percentunit" + } + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval])) >= 0", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Error rate / operation", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Operations", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Get", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"get_range\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: GetRange", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"exists\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Exists", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"attributes\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Attributes", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"upload\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Upload", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",operation=\"delete\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Op: Delete", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Object Store", + "uid": "mimir-e1324ee2a434f4158c00a9ee279d3292", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overrides.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overrides.json new file mode 100644 index 00000000..a4433373 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overrides.json @@ -0,0 +1,270 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 1, + "span": 12, + "targets": [ + { + "expr": "max by(limit_name) (cortex_limits_defaults{cluster_id=~\"$cluster\",namespace=~\"$namespace\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Defaults", + "transformations": [ + { + "id": "labelsToFields", + "options": {} + }, + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Value": 1, + "limit_name": 0 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "limit_name" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${datasource}", + "id": 2, + "span": 12, + "targets": [ + { + "expr": "max by(user, limit_name) (cortex_limits_overrides{cluster_id=~\"$cluster\",namespace=~\"$namespace\",user=~\"${tenant_id}\"})", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Per-tenant overrides", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "mode": "columns", + "valueLabel": "limit_name" + } + }, + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "user": 0 + } + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Overrides", + "uid": "mimir-1e2c358600ac53f09faea133f811b5bb", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-networking.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-networking.json index 8a7c166f..5fdfd7fd 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-networking.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-networking.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -73,7 +75,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -121,7 +123,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -172,13 +174,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -229,19 +231,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -301,7 +303,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -349,7 +351,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -400,13 +402,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -457,19 +459,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -529,7 +531,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -577,7 +579,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -628,13 +630,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -685,19 +687,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -718,7 +720,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -749,7 +753,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -772,7 +776,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -815,6 +819,6 @@ }, "timezone": "utc", "title": "Mimir / Overview networking", - "uid": "e15c71d372cc541367a088f10d9fcd92", + "uid": "mimir-e15c71d372cc541367a088f10d9fcd92", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-resources.json index 1ef65e1b..c66fa632 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-resources.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview-resources.json @@ -1,23 +1,19 @@ { + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] + "list": [] }, "editable": true, - "fiscalYearStartMonth": 0, + "gnetId": null, "graphTooltip": 1, + "hideControls": false, "links": [ { "asDropdown": true, @@ -25,1569 +21,869 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", "type": "dashboards" } ], - "panels": [ + "refresh": "10s", + "rows": [ { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 16, - "panels": [], - "targets": [ + "collapse": false, + "height": "250px", + "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "refId": "A" - } - ], - "title": "Writes", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" + ], + "title": "CPU", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" - }, - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" } ], - "title": "Memory (go heap inuse)", - "type": "timeseries" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Writes", + "titleSize": "h6" }, { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 17, - "panels": [], - "targets": [ + "collapse": false, + "height": "250px", + "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "refId": "A" - } - ], - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 0, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 9 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "hide": true, - "legendFormat": "{{pod}} - {{device}}", - "refId": "Original query" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": " container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n", - "format": "time_series", - "hide": false, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "A" - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 0, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "hide": true, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "Original query" + ], + "title": "Disk writes", + "type": "timeseries" }, { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": " container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n", - "format": "time_series", - "hide": false, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "GS query" - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", - "format": "time_series", - "hide": true, - "legendFormat": "{{persistentvolumeclaim}}", - "range": true, - "refId": "original query" + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"distributor|ingester|mimir-write\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" }, { - "datasource": { - "uid": "$datasource" + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "editorMode": "code", - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", persistentvolumeclaim=~\".*(distributor|ingester|mimir-write).*\"} /\n kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", persistentvolumeclaim=~\".*(distributor|ingester|mimir-write).*\"}\n)\n", - "format": "time_series", - "hide": false, - "legendFormat": "{{persistentvolumeclaim}}", - "range": true, - "refId": "GS query" + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(distributor|ingester|mimir-write).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" } ], - "title": "Disk space utilization", - "type": "timeseries" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" }, { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 18, - "panels": [], - "targets": [ + "collapse": false, + "height": "250px", + "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "refId": "A" - } - ], - "title": "Reads", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 17 - }, - "id": 7, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" + ], + "title": "CPU", + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 17 - }, - "id": 8, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" - }, - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 17 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" } ], - "title": "Memory (go heap inuse)", - "type": "timeseries" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Reads", + "titleSize": "h6" }, { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 - }, - "id": 19, - "panels": [], - "targets": [ + "collapse": false, + "height": "250px", + "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "refId": "A" - } - ], - "title": "Backend", - "type": "row" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 25 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "CPU", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 25 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + ], + "title": "CPU", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" - }, - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" - } - ], - "title": "Memory (workingset)", - "type": "timeseries" - }, - { - "datasource": { - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 25 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" }, - "tooltip": { - "mode": "multi", - "sort": "desc" - } - }, - "targets": [ { - "datasource": { - "uid": "$datasource" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } }, - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "refId": "A" + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" } ], - "title": "Memory (go heap inuse)", - "type": "timeseries" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend", + "titleSize": "h6" }, { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 20, - "panels": [], - "targets": [ + "collapse": false, + "height": "250px", + "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "refId": "A" - } - ], - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 0, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 33 - }, - "id": 13, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "hide": true, - "legendFormat": "{{pod}} - {{device}}", - "refId": "Original query" - }, - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": " container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n", - "format": "time_series", - "hide": false, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "GS query" - } - ], - "title": "Disk writes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 100, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 0, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null } - }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 33 - }, - "id": 14, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", - "format": "time_series", - "hide": true, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "Original query" + ], + "title": "Disk writes", + "type": "timeseries" }, { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": " container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }", - "format": "time_series", - "hide": false, - "legendFormat": "{{pod}} - {{device}}", - "range": true, - "refId": "GS query" - } - ], - "title": "Disk reads", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 1, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 33 - }, - "id": 15, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "uid": "$datasource" - }, - "editorMode": "code", - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{namespace=~\"$namespace\", persistentvolumeclaim=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"} /\n kubelet_volume_stats_capacity_bytes{namespace=~\"$namespace\", persistentvolumeclaim=~\".*(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"}\n)", - "format": "time_series", - "hide": false, - "legendFormat": "{{persistentvolumeclaim}}", - "range": true, - "refId": "Original query" + "span": 4, + "targets": [ + { + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk reads", + "type": "timeseries" }, { - "datasource": { - "uid": "$datasource" + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", - "format": "time_series", - "hide": false, - "legendFormat": "{{persistentvolumeclaim}}", - "refId": "GS query" + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(query-scheduler|ruler-query-scheduler|ruler|store-gateway|compactor|alertmanager|overrides-exporter|mimir-backend).*\"\n }\n)\n", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk space utilization", + "type": "timeseries" } ], - "title": "Disk space utilization", - "type": "timeseries" + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" } ], - "refresh": "1m", - "schemaVersion": 39, + "schemaVersion": 14, + "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ { "current": { - "selected": false, "text": "default", "value": "default" }, "hide": 0, - "includeAll": false, "label": "Data source", - "multi": false, "name": "datasource", "options": [], "query": "prometheus", "refresh": 1, "regex": "", - "skipUrlSync": false, "type": "datasource" }, { "allValue": ".*", "current": { - "selected": true, - "text": "All", - "value": "$__all" + "text": "prod", + "value": "prod" }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" - }, - "definition": "", + "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "cluster", "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", + "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { + "allValue": null, "current": { - "selected": true, - "text": "mimir", - "value": "mimir" - }, - "datasource": { - "type": "prometheus", - "uid": "$datasource" + "text": "prod", + "value": "prod" }, - "definition": "", + "datasource": "$datasource", "hide": 0, "includeAll": false, "label": "namespace", "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", - "skipUrlSync": false, "sort": 1, "tagValuesQuery": "", + "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1625,7 +921,6 @@ }, "timezone": "utc", "title": "Mimir / Overview resources", - "uid": "mimir-overview-resources", - "version": 1, - "weekStart": "" + "uid": "mimir-a9b92d3c4d1af325d872a9e9a7083d71", + "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview.json index a58095a8..ecd4284f 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-overview.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -64,7 +66,7 @@ }, { "color": "#E24D42", - "value": 0.050000000000000003 + "value": 0.05 } ] } @@ -81,7 +83,7 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(histogram_count(rate(cortex_request_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval])))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))\n < ($latency_metrics * -Inf)", "instant": false, "legendFormat": "Writes", "range": true @@ -91,7 +93,27 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "(\n sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n < ($latency_metrics * +Inf)", + "instant": false, + "legendFormat": "Writes", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(histogram_count(rate(cortex_request_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*|error\"}[$__rate_interval])))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(histogram_count(rate(cortex_request_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))\n < ($latency_metrics * -Inf)", + "instant": false, + "legendFormat": "Reads", + "range": true + }, + { + "datasource": { + "uid": "$datasource" + }, + "exemplar": false, + "expr": "(\n # gRPC errors are not tracked as 5xx but \"error\".\n sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.*|error\"}[$__rate_interval]))\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n < ($latency_metrics * +Inf)", "instant": false, "legendFormat": "Reads", "range": true @@ -101,7 +123,7 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "(\n (\n sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n +\n # Consider missed evaluations as failures.\n sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n )\n or\n # Handle the case no failure has been tracked yet.\n vector(0)\n)\n/\nsum(rate(cortex_prometheus_rule_evaluations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "instant": false, "legendFormat": "Rule evaluations", "range": true @@ -111,7 +133,7 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", + "expr": "(\n # Failed notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Failed notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_id_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n/\n(\n # Total notifications from ruler to Alertmanager (handling the case the ruler metrics are missing).\n ((sum(rate(cortex_prometheus_notifications_sent_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n) or vector(0))\n +\n # Total notifications from Alertmanager to receivers (handling the case the alertmanager metrics are missing).\n ((sum(cluster_id_job_integration:cortex_alertmanager_notifications_total:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\"})\n) or vector(0))\n)\n", "instant": false, "legendFormat": "Alerting notifications", "range": true @@ -121,7 +143,7 @@ "uid": "$datasource" }, "exemplar": false, - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n/\nsum(rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", "instant": false, "legendFormat": "Object storage", "range": true @@ -133,7 +155,7 @@ { "id": 3, "options": { - "alertInstanceLabelFilter": "cluster=~\"$cluster\", namespace=~\"$namespace\"", + "alertInstanceLabelFilter": "cluster_id=~\"$cluster\", namespace=~\"$namespace\"", "alertName": "Mimir", "dashboardAlerts": false, "maxItems": 100, @@ -348,7 +370,7 @@ "span": 3, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -397,19 +419,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -457,13 +479,13 @@ "span": 3, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_received_samples:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "samples / sec", "legendLink": null }, { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "exemplars / sec", "legendLink": null @@ -670,7 +692,7 @@ "span": 3, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -719,19 +741,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -763,7 +785,217 @@ }, "unit": "reqps" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "instant queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#429D48", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_range($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "range queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#F1C731", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_labels($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label names\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#2A66CF", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_label_name_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label values\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#9E44C1", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "series queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#FFAB57", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_read($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "remote read queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C79424", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_metadata($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "metadata queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#84D586", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_query_exemplars($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "exemplar queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#A1C4FC", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_active_series($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"active series\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#C788DE", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_names($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label name cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#3F6833", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/.*_api_v1_cardinality_label_values($|[^_])/" + }, + "properties": [ + { + "id": "displayName", + "value": "\"label value cardinality\" queries" + }, + { + "id": "color", + "value": { + "fixedColor": "#447EBC", + "mode": "fixed" + } + } + ] + } + ] }, "id": 11, "links": [], @@ -776,96 +1008,15 @@ "sort": "none" } }, - "seriesOverrides": [ - { - "alias": "instant queries", - "color": "#429D48" - }, - { - "alias": "range queries", - "color": "#F1C731" - }, - { - "alias": "\"label names\" queries", - "color": "#2A66CF" - }, - { - "alias": "\"label values\" queries", - "color": "#9E44C1" - }, - { - "alias": "series queries", - "color": "#FFAB57" - }, - { - "alias": "remote read queries", - "color": "#C79424" - }, - { - "alias": "metadata queries", - "color": "#84D586" - }, - { - "alias": "exemplar queries", - "color": "#A1C4FC" - }, - { - "alias": "other", - "color": "#C788DE" - } - ], "span": 3, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query\"}[$__rate_interval]))", + "expr": "sum by (route) (rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))", "format": "time_series", - "legendFormat": "instant queries", "legendLink": null }, { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "range queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label names\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "\"label values\" queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "series queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_read\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "remote read queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_metadata\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "metadata queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_exemplars\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "exemplar queries", - "legendLink": null - }, - { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\".*(query|query_range|label.*|series|read|metadata|query_exemplars)\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_.*\",route!~\"(prometheus|api_prom)(_api_v1_query|_api_v1_query_range|_api_v1_labels|_api_v1_label_name_values|_api_v1_series|_api_v1_read|_api_v1_metadata|_api_v1_query_exemplars|_api_v1_cardinality_active_series|_api_v1_cardinality_label_names|_api_v1_cardinality_label_values)\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "other", "legendLink": null @@ -967,19 +1118,19 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "success", "legendLink": null }, { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null }, { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "missed", "legendLink": null @@ -1027,7 +1178,7 @@ "span": 3, "targets": [ { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "average", "legendLink": null @@ -1106,13 +1257,13 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n -\nsum(rate(cortex_prometheus_notifications_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1214,13 +1365,13 @@ "span": 3, "targets": [ { - "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "expr": "sum(rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n-\nsum(rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", + "expr": "sum(rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1298,7 +1449,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -1346,7 +1497,7 @@ "span": 3, "targets": [ { - "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", + "expr": "sum(max by(user) (max_over_time(cortex_bucket_blocks_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[15m])))", "format": "time_series", "legendFormat": "blocks", "legendLink": null @@ -1367,7 +1518,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -1399,7 +1552,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -1423,7 +1576,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -1432,6 +1585,35 @@ "tagsQuery": "", "type": "query", "useTags": false + }, + { + "current": { + "selected": true, + "text": "classic", + "value": "1" + }, + "description": "Choose between showing latencies based on low precision classic or high precision native histogram metrics.", + "hide": 0, + "includeAll": false, + "label": "Latency metrics", + "multi": false, + "name": "latency_metrics", + "options": [ + { + "selected": false, + "text": "native", + "value": "-1" + }, + { + "selected": true, + "text": "classic", + "value": "1" + } + ], + "query": "native : -1,classic : 1", + "skipUrlSync": false, + "type": "custom", + "useTags": false } ] }, @@ -1466,6 +1648,6 @@ }, "timezone": "utc", "title": "Mimir / Overview", - "uid": "ffcd83628d7d4b5a03d1cafd159e6c9c", + "uid": "mimir-ffcd83628d7d4b5a03d1cafd159e6c9c", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-queries.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-queries.json new file mode 100644 index 00000000..afe935c6 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-queries.json @@ -0,0 +1,2539 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_frontend_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_frontend_retries_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_frontend_retries_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_frontend_retries_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_query_frontend_retries_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Retries", + "type": "timeseries", + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_frontend_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(user) (cortex_query_frontend_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Queue duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Queue length (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(user) (cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}) > 0", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "Queue length (per user)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Intervals per query\nThe average number of split queries (partitioned by time) executed a single input query.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_split_queries_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_frontend_query_range_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", method=\"split_by_interval_and_results_cache\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "splitting rate", + "legendLink": null + } + ], + "title": "Intervals per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Query the new metric introduced in Mimir 2.10.\n(\n sum by(request_type) (rate(cortex_frontend_query_result_cache_hits_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n /\n sum by(request_type) (rate(cortex_frontend_query_result_cache_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n)\n# Otherwise fallback to the previous general-purpose metrics.\nor\n(\n label_replace(\n # Query metrics before and after dskit cache refactor.\n sum (\n rate(thanos_cache_memcached_hits_total{name=\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_hits_total{name=\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n )\n /\n sum (\n rate(thanos_cache_memcached_requests_total{name=~\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_requests_total{name=~\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n ),\n \"request_type\", \"query_range\", \"\", \"\")\n)\n", + "format": "time_series", + "legendFormat": "{{request_type}}", + "legendLink": null + } + ], + "title": "Query results cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Query results cache skipped\nThe % of queries whose results could not be cached.\nIt is tracked for each split query when the splitting by interval is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_result_cache_skipped_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (reason) /\nignoring (reason) group_left sum(rate(cortex_frontend_query_result_cache_attempted_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Query results cache skipped", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend - query splitting and results cache", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Sharded queries ratio\nThe % of queries that have been successfully rewritten and executed in a shardable way.\nThis panel only takes into account the type of queries that are supported by query sharding (eg. range queries).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_frontend_query_sharding_rewrites_succeeded_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) /\nsum(rate(cortex_frontend_query_sharding_rewrites_attempted_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "sharded queries ratio", + "legendLink": null + } + ], + "title": "Sharded queries ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of sharded queries per query\nThe number of sharded queries that have been executed for a single input query. It only tracks queries that\nhave been successfully rewritten in a shardable way.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 12, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_frontend_sharded_queries_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_frontend_sharded_queries_per_query_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_frontend_sharded_queries_per_query_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of sharded queries per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Query-frontend - query sharding", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job:cortex_ingester_queried_series_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job:cortex_ingester_queried_series_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_id_job:cortex_ingester_queried_series_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_id_job:cortex_ingester_queried_series_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Series per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job:cortex_ingester_queried_samples_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_id_job:cortex_ingester_queried_samples_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_id_job:cortex_ingester_queried_samples_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Samples per query", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job:cortex_ingester_queried_exemplars_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})) * 1", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1 * sum(cluster_id_job:cortex_ingester_queried_exemplars_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}) / sum(cluster_id_job:cortex_ingester_queried_exemplars_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Exemplars per query", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingester", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Number of store-gateways hit per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Refetches of missing blocks per query", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Consistency checks failed\nRate of queries that had to run with consistency checks and those checks failed. A failed consistency check means that some of at least one block which had to be queried wasn't present in any of the store-gateways.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Failure Rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 18, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Failure Rate", + "legendLink": null + } + ], + "title": "Consistency checks failed", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Rejected queries\nThe proportion of all queries received by queriers that were rejected for some reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 19, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_querier_queries_rejected_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) / ignoring (reason) group_left sum(rate(cortex_querier_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_query(_range)?\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{reason}}", + "legendLink": null + } + ], + "title": "Rejected queries", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 20, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max(cortex_bucket_index_loaded{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Max", + "legendLink": null + }, + { + "expr": "min(cortex_bucket_index_loaded{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Min", + "legendLink": null + }, + { + "expr": "avg(cortex_bucket_index_loaded{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"})", + "format": "time_series", + "legendFormat": "Average", + "legendLink": null + } + ], + "title": "Bucket indexes loaded (per querier)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 21, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_index_loads_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_index_load_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_index_load_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Bucket indexes load / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 22, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_index_load_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_index_load_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_index_load_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Bucket indexes load latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 23, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_blocks_queried_sum{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "blocks", + "legendLink": null + } + ], + "title": "Blocks queried / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "id": 24, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks refetched\".\n rate(cortex_bucket_store_series_data_size_fetched_bytes_sum{component=\"store-gateway\", stage!=\"refetched\", cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data fetched / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "id": 25, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(data_type) (\n # Exclude \"chunks processed\" to only count \"chunks returned\", other than postings and series.\n rate(cortex_bucket_store_series_data_size_touched_bytes_sum{component=\"store-gateway\", stage!=\"processed\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{data_type}}", + "legendLink": null + } + ], + "title": "Data touched / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 26, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum by(stage) (rate(cortex_bucket_store_series_request_stage_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request average latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 27, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by(stage, le) (rate(cortex_bucket_store_series_request_stage_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])))\n", + "format": "time_series", + "legendFormat": "{{stage}}", + "legendLink": null + } + ], + "title": "Series request 99th percentile latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Series batch preloading efficiency\nThis panel shows the % of time reduced by preloading, for Series() requests which have been\nsplit to 2+ batches. If a Series() request is served within a single batch, then preloading\nis not triggered, and thus not counted in this measurement.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 28, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "# Clamping min to 0 because if preloading not useful at all, then the actual value we get is\n# slightly negative because of the small overhead introduced by preloading.\nclamp_min(1 - (\n sum(rate(cortex_bucket_store_series_batch_preloading_wait_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\n sum(rate(cortex_bucket_store_series_batch_preloading_load_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n), 0)\n", + "format": "time_series", + "legendFormat": "% of time reduced by preloading", + "legendLink": null + } + ], + "title": "Series batch preloading efficiency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Blocks currently owned\nThis panel shows the number of blocks owned by each store-gateway replica.\nFor each owned block, the store-gateway keeps its index-header on disk, and\neventually loaded in memory (if index-header lazy loading is disabled, or lazy loading\nis enabled and the index-header was loaded).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 29, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_blocks_loaded{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Blocks currently owned", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_loads_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_load_failures_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks loaded / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 31, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_block_drops_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) - sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_bucket_store_block_drop_failures_total{component=\"store-gateway\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "Blocks dropped / sec", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 32, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "cortex_bucket_store_indexheader_lazy_load_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"} - cortex_bucket_store_indexheader_lazy_unload_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Lazy loaded index-headers", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 33, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_store_indexheader_lazy_load_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load duration", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Index-header lazy load gate latency\nTime spent waiting for a turn to load an index header. This time is not included in \"Index-header lazy load duration.\"\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 34, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_bucket_stores_gate_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_bucket_stores_gate_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_bucket_stores_gate_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",gate=\"index_header\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Index-header lazy load gate latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 35, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_bucket_store_series_hash_cache_hits_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_bucket_store_series_hash_cache_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Series hash cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 36, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(thanos_store_index_cache_hits_total{item_type=\"ExpandedPostings\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(thanos_store_index_cache_requests_total{item_type=\"ExpandedPostings\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "ExpandedPostings cache hit ratio", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 37, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(rate(cortex_cache_memory_hits_total{name=\"chunks-attributes-cache\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_cache_memory_requests_total{name=\"chunks-attributes-cache\",cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "format": "time_series", + "legendFormat": "hit ratio", + "legendLink": null + } + ], + "title": "Chunks attributes in-memory cache hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Queries", + "uid": "mimir-b3abe8d5c040395cc36615cb4334c92d", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-networking.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-networking.json index 654254ef..a14cbb4a 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-networking.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-networking.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -73,7 +75,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -121,7 +123,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -172,13 +174,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -229,19 +231,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read).*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -301,7 +303,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -349,7 +351,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -400,13 +402,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -457,19 +459,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-frontend.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -529,7 +531,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -577,7 +579,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -628,13 +630,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -685,19 +687,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?query-scheduler.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -757,7 +759,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -805,7 +807,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -856,13 +858,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -913,19 +915,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?querier.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -985,7 +987,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1033,7 +1035,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1084,13 +1086,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -1141,19 +1143,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?store-gateway.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1213,7 +1215,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1261,7 +1263,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1312,13 +1314,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -1369,19 +1371,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -1402,7 +1404,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -1433,7 +1437,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -1456,7 +1460,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -1499,6 +1503,6 @@ }, "timezone": "utc", "title": "Mimir / Reads networking", - "uid": "54b2a0a4748b3bd1aefa92ce5559a1c2", + "uid": "mimir-54b2a0a4748b3bd1aefa92ce5559a1c2", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-resources.json index 2a51eded..5662e660 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-resources.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads-resources.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -73,7 +75,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -121,7 +123,7 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -169,7 +171,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend|querier|ruler-query-frontend|ruler-querier|mimir-read\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -280,19 +282,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -391,19 +393,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -451,7 +453,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-frontend\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -562,19 +564,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -673,19 +675,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -733,7 +735,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"query-scheduler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -844,19 +846,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -955,19 +957,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1015,7 +1017,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"querier\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1126,19 +1128,19 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1186,7 +1188,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1297,19 +1299,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "max by(pod) (container_memory_rss{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1408,19 +1410,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1480,7 +1482,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "sum by(pod) (cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1579,19 +1581,19 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1702,19 +1704,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1762,7 +1764,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -1873,19 +1875,19 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -1933,7 +1935,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -2044,19 +2046,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "expr": "max by(pod) (container_memory_rss{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -2155,19 +2157,19 @@ "span": 6, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"store-gateway\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -2227,7 +2229,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -2275,7 +2277,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"store-gateway\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -2326,7 +2328,7 @@ "span": 4, "targets": [ { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(store-gateway).*\"\n }\n)\n", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -2347,7 +2349,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -2378,7 +2382,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -2401,7 +2405,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -2444,6 +2448,6 @@ }, "timezone": "utc", "title": "Mimir / Reads resources", - "uid": "cc86fd5aa9301c6528986572ad974db9", + "uid": "mimir-cc86fd5aa9301c6528986572ad974db9", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads.json index ffd5f025..64050e59 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-reads.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -91,7 +93,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",\n route=~\"(prometheus|api_prom)_api_v1_query\"\n }[$__rate_interval]\n )\n or\n rate(\n cortex_prometheus_rule_evaluations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", "format": "time_series", "instant": true, "refId": "A" @@ -167,7 +169,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_query_range\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -243,7 +245,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_labels\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -319,7 +321,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_label_name_values\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -395,7 +397,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\",route=~\"(prometheus|api_prom)_api_v1_series\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -624,7 +626,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -673,19 +675,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -735,7 +737,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -932,7 +934,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -982,19 +984,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1063,7 +1065,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__interval]))", "format": "time_series", "legendFormat": "Queue length", "legendLink": null @@ -1125,7 +1127,7 @@ "span": 4, "targets": [ { - "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", "refId": "A" @@ -1175,7 +1177,7 @@ "span": 4, "targets": [ { - "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", "refId": "A" @@ -1225,7 +1227,7 @@ "span": 4, "targets": [ { - "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", "format": "time_series", "legendFormat": "Average: {{ additional_queue_dimensions }}", "refId": "C" @@ -1242,6 +1244,130 @@ "title": "Query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", "titleSize": "h6" }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum (\n rate(thanos_memcached_operations_total{name=\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{name=\"frontend-cache\", cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\"}[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "Requests/s", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", name=\"frontend-cache\"}[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Cache – query results", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -1421,7 +1547,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1470,19 +1596,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_id_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1532,7 +1658,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -1728,7 +1854,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1777,19 +1903,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1839,7 +1965,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=~\"/cortex.Ingester/(QueryStream|QueryExemplars|LabelValues|LabelNames|UserStats|AllUserStats|MetricsForLabelMatchers|MetricsMetadata|LabelNamesAndValues|LabelValuesCardinality|ActiveSeries)\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -2035,7 +2161,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -2084,19 +2210,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2146,7 +2272,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", route=~\"/gatewaypb.StoreGateway/.*\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -2169,18 +2295,19 @@ "panels": [ { "datasource": "$datasource", + "description": "### Replicas\nThe maximum, and current number of querier replicas.\nPlease note that the current number of replicas can still show 1 replica even when scaled to 0.\nSince HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, + "fillOpacity": 1, + "lineWidth": 1, "pointSize": 5, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", - "mode": "normal" + "mode": "none" } }, "min": 0, @@ -2188,147 +2315,60 @@ "mode": "absolute", "steps": [] }, - "unit": "reqps" + "unit": "short" }, "overrides": [ { "matcher": { - "id": "byName", - "options": "1xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EAB839", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "2xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "3xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#6ED0E0", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "4xx" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#EF843C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "5xx" + "id": "byRegexp", + "options": "/Max .+/" }, "properties": [ { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "OK" - }, - "properties": [ + "id": "custom.fillOpacity", + "value": 0 + }, { - "id": "color", + "id": "custom.lineStyle", "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + "fill": "dash" } } ] }, { "matcher": { - "id": "byName", - "options": "cancel" + "id": "byRegexp", + "options": "/Current .+/" }, "properties": [ { - "id": "color", - "value": { - "fixedColor": "#A9A9A9", - "mode": "fixed" - } + "id": "custom.fillOpacity", + "value": 0 } ] }, { "matcher": { - "id": "byName", - "options": "error" + "id": "byRegexp", + "options": "/Min .+/" }, "properties": [ { - "id": "color", - "value": { - "fixedColor": "#E24D42", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "success" - }, - "properties": [ + "id": "custom.fillOpacity", + "value": 0 + }, { - "id": "color", + "id": "custom.lineStyle", "value": { - "fixedColor": "#7EB26D", - "mode": "fixed" + "fill": "dash" } } ] } ] }, - "id": 30, + "id": 27, "links": [], "options": { "legend": { @@ -2339,20 +2379,33 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", "format": "time_series", - "legendFormat": "{{status}}", - "refId": "A" + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # HPA doesn't go to 0 replicas, so we multiply by 0 if the HPA is not active.\n * on (cluster_id, namespace, horizontalpodautoscaler)\n kube_horizontalpodautoscaler_status_condition{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\", condition=\"ScalingActive\", status=\"true\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n # Add the scaletargetref_name label which is more readable than \"kube-hpa-...\"\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null } ], - "title": "Requests / sec", + "title": "Replicas", "type": "timeseries" }, { "datasource": "$datasource", + "description": "### Scaling metric (desired replicas)\nThis panel shows the result scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints which are applied later.\n\n", "fieldConfig": { "defaults": { "custom": { @@ -2372,13 +2425,12 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "short" }, "overrides": [] }, - "id": 31, + "id": 28, "links": [], - "nullPointMode": "null as zero", "options": { "legend": { "showLegend": true @@ -2388,54 +2440,1037 @@ "sort": "none" } }, - "span": 6, + "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "99th Percentile", - "refId": "A" - }, - { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", - "format": "time_series", - "legendFormat": "50th Percentile", - "refId": "B" - }, - { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left\n label_replace(label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ), \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\")\n)\n", "format": "time_series", - "legendFormat": "Average", - "refId": "C" + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (desired replicas)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 29, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(cluster_id, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster_id, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-querier\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Querier - autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 31, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\", kv_name=~\"store-gateway\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Store-gateway – key-value store for store-gateways ring", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 32, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(\n thanos_memcached_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n or ignoring(backend)\n rate(\n thanos_cache_operations_total{\n component=\"store-gateway\",\n name=\"index-cache\",\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 33, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"index-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Hit ratio\nEven if you do not set up memcached for the blocks index cache, you will still see data in this panel because the store-gateway by default has an\nin-memory blocks index cache.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 34, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(item_type) (\n rate(\n thanos_store_index_cache_hits_total{\n component=\"store-gateway\",\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n/\nsum by(item_type) (\n rate(\n thanos_store_index_cache_requests_total{\n component=\"store-gateway\",\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "legendFormat": "{{item_type}}", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – block index cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 35, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 36, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 37, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"chunks-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – chunks cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "id": 38, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 39, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n operation=\"getmulti\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" } ], - "title": "Latency", - "type": "timeseries", - "yaxes": [ + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 40, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((store-gateway.*|cortex|mimir|mimir-backend.*))\",\n component=\"store-gateway\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Memcached – metadata cache (store-gateway accesses)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, "min": 0, - "show": true + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" }, + "overrides": [] + }, + "id": 41, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": false + "expr": "sum by(operation) (\n # Backwards compatibility\n rate(thanos_memcached_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or ignoring(backend)\n rate(thanos_cache_operations_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "{{operation}}", + "legendLink": null } - ] + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 42, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_bucket{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) by (le)) * 1e3\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_sum{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n) * 1e3\n/\nsum(\n # Backwards compatibility\n rate(thanos_memcached_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n or\n rate(thanos_cache_operation_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n operation=\"getmulti\",\n component=\"querier\",\n name=\"metadata-cache\"\n}\n[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (getmulti)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "id": 43, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n # Backwards compatibility\n rate(thanos_cache_memcached_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_hits_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n/\nsum(\n # Backwards compatibility\n rate(thanos_cache_memcached_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n or\n rate(thanos_cache_requests_total{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((querier.*|cortex|mimir|mimir-read.*))\",\n component=\"querier\",\n name=\"metadata-cache\"\n }[$__rate_interval])\n)\n", + "format": "time_series", + "legendFormat": "items", + "legendLink": null + } + ], + "title": "Hit ratio", + "type": "timeseries" } ], "repeat": null, "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "Store-gateway – key-value store for store-gateways ring", + "title": "Memcached – metadata cache (querier accesses)", "titleSize": "h6" }, { @@ -2481,7 +3516,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -2514,7 +3549,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\"}[$__rate_interval])) >= 0", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -2563,19 +3598,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"attributes\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2642,19 +3677,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"exists\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2733,19 +3768,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2812,19 +3847,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"get_range\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2891,19 +3926,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"upload\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2970,19 +4005,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"store-gateway\",operation=\"delete\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3060,7 +4095,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -3093,7 +4128,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\"}[$__rate_interval])) >= 0", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -3142,19 +4177,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"attributes\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3221,19 +4256,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"exists\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3312,19 +4347,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3391,19 +4426,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"get_range\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3470,19 +4505,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"upload\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3549,19 +4584,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"querier\",operation=\"delete\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -3600,7 +4635,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -3632,7 +4669,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -3656,7 +4693,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -3699,6 +4736,6 @@ }, "timezone": "utc", "title": "Mimir / Reads", - "uid": "e327503188913dc38ad571c647eef643", + "uid": "mimir-e327503188913dc38ad571c647eef643", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-networking.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-networking.json new file mode 100644 index 00000000..d85bd515 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-networking.json @@ -0,0 +1,1052 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(ruler-query-frontend|ruler-query-scheduler|ruler-querier).*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Summary", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-frontend.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-query-scheduler.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Receive bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Transmit bandwidth", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + } + ], + "title": "Inflight requests (per pod)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "custom": { + "fillOpacity": 0 + }, + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}))", + "format": "time_series", + "legendFormat": "avg", + "legendLink": null + }, + { + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"}))", + "format": "time_series", + "legendFormat": "highest", + "legendLink": null + }, + { + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ruler-querier.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "TCP connections (per pod)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads networking", + "uid": "mimir-9e8cfff65f91632f8a25981c6fe44bc9", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-resources.json new file mode 100644 index 00000000..61ae07c0 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads-resources.json @@ -0,0 +1,986 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-frontend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-query-scheduler\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"cpu\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\",resource=\"memory\"})", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ruler-querier\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads resources", + "uid": "mimir-1940f6ef765a506a171faa2056c956c3", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads.json new file mode 100644 index 00000000..1e85b0fa --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-remote-ruler-reads.json @@ -0,0 +1,1687 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "175px", + "panels": [ + { + "content": "

\n This dashboard shows health metrics for the ruler read path when remote operational mode is enabled.\n It is broken into sections for each service on the ruler read path, and organized by the order in which the read request flows.\n
\n For each service, there are three panels showing (1) requests per second to that service, (2) average, median, and p99 latency of requests to that service, and (3) p99 latency of requests to each instance of that service.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Remote ruler reads dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "100px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "### Evaluations per second\nRate of rule expressions evaluated per second.\n\n", + "fill": 1, + "format": "reqps", + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n cortex_request_duration_seconds_count{\n cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\",\n route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"\n }[$__rate_interval]\n )\n)\n", + "format": "time_series", + "instant": true, + "refId": "A" + } + ], + "thresholds": "70,80", + "timeFrom": null, + "timeShift": null, + "title": "Evaluations / sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "singlestat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Headlines", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", route=~\"/httpgrpc.HTTP/Handle|.*api_v1_query\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-frontend", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Requests / sec\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Latency (Time in Queue)\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency (Time in Queue)", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "description": "### Queue length\n

\n The query scheduler is an optional service that moves\n the internal queue from the query-frontend into a\n separate component.\n If this service is not deployed,\n these panels will show \"No data.\"\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "queries" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "sum(min_over_time(cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__interval]))", + "format": "time_series", + "legendFormat": "Queue length", + "legendLink": null + } + ], + "title": "Queue length", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### 99th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.99, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "99th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "99th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### 50th Percentile Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(histogram_quantile(0.50, sum(rate(cortex_query_scheduler_queue_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (le, additional_queue_dimensions)) * 1e3, \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "50th Percentile: {{ additional_queue_dimensions }}", + "refId": "A" + } + ], + "title": "50th Percentile Latency by Queue Dimension", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Average Latency by Queue Dimension\n

\n The query scheduler can optionally create subqueues\n in order to enforce round-robin query queuing fairness\n across additional queue dimensions beyond the default.\n\n By default, query queuing fairness is only applied by tenant ID.\n Queries without additional queue dimensions are labeled 'none'.\n

\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "noValue": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "label_replace(sum(rate(cortex_query_scheduler_queue_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions) * 1e3 / sum(rate(cortex_query_scheduler_queue_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\"}[$__rate_interval])) by (additional_queue_dimensions), \"additional_queue_dimensions\", \"none\", \"additional_queue_dimensions\", \"^$\")\n", + "format": "time_series", + "legendFormat": "Average: {{ additional_queue_dimensions }}", + "refId": "C" + } + ], + "title": "Average Latency by Queue Dimension", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-query-scheduler Latency (Time in Queue) Breakout by Additional Queue Dimensions", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_querier_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "99th percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_querier_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})) * 1e3", + "format": "time_series", + "legendFormat": "50th percentile", + "refId": "B" + }, + { + "expr": "1e3 * sum(cluster_id_job_route:cortex_querier_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}) / sum(cluster_id_job_route:cortex_querier_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"})", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "displayMode": "hidden", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "exemplar": true, + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_querier_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-querier.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval])))", + "format": "time_series", + "legendFormat": "", + "legendLink": null + } + ], + "title": "Per pod p99 latency", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Replicas\nThe maximum and current number of ruler-querier replicas.\nNote: The current number of replicas can still show 1 replica even when scaled to 0.\nBecause HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Max .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Current .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Min .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # HPA doesn't go to 0 replicas, so we multiply by 0 if the HPA is not active\n * on (cluster_id, namespace, horizontalpodautoscaler)\n kube_horizontalpodautoscaler_status_condition{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\", condition=\"ScalingActive\", status=\"true\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by(cluster_id, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster_id, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ruler-querier - autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Scaling metric (CPU): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*cpu.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (CPU): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 18, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*memory.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (memory): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (in-flight queries): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 19, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*queries.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-ruler-querier\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (in-flight queries): Desired replicas", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Remote ruler reads", + "uid": "mimir-f103238f7f5ab2f1345ce650cbfbfe2f", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-rollout-progress.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-rollout-progress.json new file mode 100644 index 00000000..9d858440 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-rollout-progress.json @@ -0,0 +1,1408 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Ready" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Updated" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 13, + "w": 10, + "x": 0, + "y": 0 + }, + "id": 1, + "links": [], + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "orientation": "horizontal", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "multi", + "sort": "none" + }, + "xField": "Workload", + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_updated{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_updated{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + }, + { + "expr": "(\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas_ready{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas_ready{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n /\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n) and (\n sum by (workload) (\n label_replace(label_replace(label_replace(\n kube_deployment_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n or\n kube_statefulset_status_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n , \"workload\", \"$1\", \"deployment\", \"(.+)\"), \"workload\", \"$1\", \"statefulset\", \"(.+)\"), \"workload\", \"$1\", \"workload\", \"(.*?)(?:-zone-[a-z])?\")\n )\n > 0\n)\n", + "format": "table", + "instant": true, + "intervalFactor": null, + "legendFormat": "__auto", + "legendLink": null, + "step": null + } + ], + "title": "Rollout progress", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time 1": true, + "Time 2": true + }, + "renameByName": { + "Value #A": "Updated", + "Value #B": "Ready", + "workload": "Workload" + } + } + }, + { + "id": "sortBy", + "options": { + "sort": [ + { + "field": "Workload" + } + ] + } + } + ], + "type": "barchart" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 0 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.2 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 0 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Writes - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.2 + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Writes 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 10, + "y": 4 + }, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"2.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 2xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 0.01 + }, + { + "color": "red", + "value": 0.05 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 12, + "y": 4 + }, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"4.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 4xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.01 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 2, + "x": 14, + "y": 4 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\",status_code=~\"5.+\"}[$__rate_interval])) /\nsum(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}[$__rate_interval]))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reads - 5xx", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "noValue": "", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2.5 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 4 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))\n", + "format": null, + "instant": false, + "interval": "", + "intervalFactor": null, + "legendFormat": "", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reads 99th latency", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "noValue": "All healthy", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + }, + { + "color": "red", + "value": 2 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "fill": 1, + "gridPos": { + "h": 3, + "w": 10, + "x": 0, + "y": 13 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "text": { + "titleSize": 14, + "valueSize": 14 + }, + "textMode": "value_and_name" + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kube_deployment_status_replicas_unavailable{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{deployment}}", + "legendLink": null, + "step": null + }, + { + "expr": "kube_statefulset_status_replicas_current{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} -\nkube_statefulset_status_replicas_ready {cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n> 0\n", + "format": null, + "instant": true, + "interval": "", + "intervalFactor": null, + "legendFormat": "{{statefulset}}", + "legendLink": null, + "step": null + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Unhealthy pods", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "stat", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "datasource": "$datasource", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "r.*" + }, + "properties": [ + { + "id": "custom.align", + "value": "center" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 10, + "y": 8 + }, + "id": 11, + "targets": [ + { + "expr": "count by(container, version) (\n label_replace(\n kube_pod_container_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\"},\n \"version\", \"$1\", \"image\", \".*:(.*)\"\n )\n)\n", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Pods count per version", + "transformations": [ + { + "id": "labelsToFields", + "options": { + "valueLabel": "version" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": { + "Time": 0, + "container": 1 + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "container" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10 + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"api_(v1|prom)_push|otlp_v1_metrics\"}))[1h:])\n)\n", + "format": "time_series", + "legendFormat": "writes", + "legendLink": null + }, + { + "expr": "1 - (\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"} offset 24h))[1h:])\n /\n avg_over_time(histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", route=~\"(prometheus|api_prom)_api_v1_.+\"}))[1h:])\n)\n", + "format": "time_series", + "legendFormat": "reads", + "legendLink": null + } + ], + "title": "Latency vs 24h ago", + "type": "timeseries" + } + ], + "refresh": "10s", + "rows": null, + "schemaVersion": 27, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Rollout progress", + "uid": "mimir-7f0b5567d543a1698e695b530eb7f5de", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-ruler.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-ruler.json index 0ff20131..de276d01 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-ruler.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-ruler.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -67,7 +69,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ruler_managers_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "sum(cortex_ruler_managers_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -142,7 +144,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", + "expr": "sum(cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -218,7 +220,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -293,7 +295,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", "format": "time_series", "instant": true, "refId": "A" @@ -417,19 +419,19 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n-\nsum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "success", "legendLink": null }, { - "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null }, { - "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_prometheus_rule_group_iterations_missed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "missed", "legendLink": null @@ -477,7 +479,7 @@ "span": 6, "targets": [ { - "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "average", "legendLink": null @@ -673,7 +675,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -722,19 +724,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", operation=\"/cortex.Ingester/Push\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -948,7 +950,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -997,19 +999,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_client_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_client_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_client_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*|ruler-querier.*))\", operation=\"/cortex.Ingester/QueryStream\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1223,7 +1225,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1272,19 +1274,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", kv_name=~\"ruler\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1363,19 +1365,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_instances_hit_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_querier_storegateway_instances_hit_per_query_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_instances_hit_per_query_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1442,19 +1444,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.99, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", + "expr": "histogram_quantile(0.50, sum(rate(cortex_querier_storegateway_refetches_per_query_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_querier_storegateway_refetches_per_query_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1 / sum(rate(cortex_querier_storegateway_refetches_per_query_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1538,7 +1540,7 @@ "span": 4, "targets": [ { - "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_querier_blocks_consistency_checks_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) / sum(rate(cortex_querier_blocks_consistency_checks_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Failures / sec", "legendLink": null @@ -1599,7 +1601,7 @@ "span": 4, "targets": [ { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", + "expr": "sum by(user) (rate(cortex_prometheus_notifications_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_sent_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]) > 0)\n> 0\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1648,7 +1650,7 @@ "span": 4, "targets": [ { - "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", + "expr": "sum by(user) (rate(cortex_prometheus_notifications_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_notifications_queue_capacity{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1697,7 +1699,7 @@ "span": 4, "targets": [ { - "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", + "expr": "sum by (user) (increase(cortex_prometheus_notifications_dropped_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1757,7 +1759,7 @@ "span": 4, "targets": [ { - "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", + "expr": "sum by(user) (rate(cortex_prometheus_rule_group_iterations_missed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1805,7 +1807,7 @@ "span": 4, "targets": [ { - "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", + "expr": "rate(cortex_prometheus_rule_group_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n /\nrate(cortex_prometheus_rule_group_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1853,7 +1855,7 @@ "span": 4, "targets": [ { - "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", + "expr": "sum by(rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) > 0", "format": "time_series", "legendFormat": "{{ rule_group }}", "legendLink": null @@ -1913,7 +1915,7 @@ "span": 12, "targets": [ { - "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", + "expr": "sum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n /\nsum by(user) (rate(cortex_prometheus_rule_evaluation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))\n", "format": "time_series", "legendFormat": "{{ user }}", "legendLink": null @@ -1973,7 +1975,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -2006,7 +2008,7 @@ "span": 3, "targets": [ { - "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", + "expr": "sum by(operation) (rate(thanos_objstore_bucket_operation_failures_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) / sum by(operation) (rate(thanos_objstore_bucket_operations_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\"}[$__rate_interval])) >= 0", "format": "time_series", "legendFormat": "{{operation}}", "legendLink": null @@ -2055,19 +2057,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"attributes\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2134,19 +2136,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"exists\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2225,19 +2227,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2304,19 +2306,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"get_range\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2383,19 +2385,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"upload\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2462,19 +2464,19 @@ "span": 3, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", namespace=~\"$namespace\",component=\"ruler-storage\",operation=\"delete\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2513,7 +2515,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -2545,7 +2549,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -2569,7 +2573,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -2612,6 +2616,6 @@ }, "timezone": "utc", "title": "Mimir / Ruler", - "uid": "631e15d5d85afb2ca8e35d62984eeaa0", + "uid": "mimir-631e15d5d85afb2ca8e35d62984eeaa0", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-scaling.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-scaling.json new file mode 100644 index 00000000..5e0e69e5 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-scaling.json @@ -0,0 +1,365 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "200px", + "panels": [ + { + "id": 1, + "options": { + "content": "This dashboard identifies scaling-related issues by suggesting services that you might want to scale up.\nThe table that follows contains a suggested number of replicas and the reason why.\nIf the system is failing and depending on the reason, try scaling up to the specified number.\nThe specified numbers are intended as helpful guidelines when things go wrong, rather than prescriptive guidelines.\n\nReasons:\n- **sample_rate**: There are not enough replicas to handle the\n sample rate. Applies to distributor and ingesters.\n- **active_series**: There are not enough replicas\n to handle the number of active series. Applies to ingesters.\n- **cpu_usage**: There are not enough replicas\n based on the CPU usage of the jobs vs the resource requests.\n Applies to all jobs.\n- **memory_usage**: There are not enough replicas based on the memory\n usage vs the resource requests. Applies to all jobs.\n- **active_series_limits**: There are not enough replicas to hold 60% of the\n sum of all the per tenant series limits.\n- **sample_rate_limits**: There are not enough replicas to handle 60% of the\n sum of all the per tenant rate limits.\n", + "mode": "markdown" + }, + "span": 12, + "title": "", + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Service scaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "400px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 0, + "desc": false + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Required Replicas", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Cluster", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Cluster", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "cluster", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Service", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "deployment", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Namespace", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "namespace", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Reason", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "reason", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "sort_desc(\n cluster_id_namespace_deployment_reason:required_replicas:count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n > ignoring(reason) group_left\n cluster_id_namespace_deployment:actual_replicas:count{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Workload-based scaling", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Scaling", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Scaling", + "uid": "mimir-64bbad83507b7289b514725658e10352", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-slow-queries.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-slow-queries.json new file mode 100644 index 00000000..f3e343d8 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-slow-queries.json @@ -0,0 +1,1467 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 1, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p99", + "legendLink": null + }, + { + "expr": "quantile_over_time(0.5, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by ()", + "format": "time_series", + "legendFormat": "p50", + "legendLink": null + } + ], + "title": "Query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Across tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user))", + "format": "time_series", + "legendFormat": "{{user}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 tenants", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(response_time) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response time", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_series_count[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched series", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap fetched_chunk_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 fetched chunks", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap response_size_bytes[$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 response size", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap duration_seconds(length) [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 time span", + "type": "timeseries" + }, + { + "datasource": "${loki_datasource}", + "description": "### Query wall time\nSeconds per second spent by queriers evaluating queries.\nThis is roughly the product of the number of subqueries for a query and how long they took.\nIn increase in this metric means that queries take more resources from the query path to evaluate.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 18, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 2, + "targets": [ + { + "expr": "topk(10, quantile_over_time(0.99, {cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | unwrap query_wall_time_seconds [$__auto]) by (user_agent))", + "format": "time_series", + "legendFormat": "{{user_agent}}", + "legendLink": null + } + ], + "title": "P99 query wall time", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top 10 User-Agents", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "${loki_datasource}", + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "fetched_chunk_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_index_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "response_size_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_hit_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "results_cache_miss_bytes" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "estimated_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_chunks_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "fetched_series_count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Time span" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Duration" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Step" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "queue_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "query_wall_time_seconds" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] + } + ] + }, + "height": "500px", + "id": 19, + "span": 12, + "targets": [ + { + "expr": "{cluster_id=~\"$cluster\",namespace=~\"$namespace\",name=~\"query-frontend.*\"} |= \"query stats\" != \"/api/v1/read\" | logfmt | user=~\"${tenant_id}\" | user_agent=~\"${user_agent}\" | response_time > ${min_duration} | label_format response_time_seconds=\"{{ if .response_time }} {{ duration .response_time }} {{ end }}\",param_step_seconds=\"{{ if .param_step }} {{ div .param_step 1000 }} {{ end }}\",length_seconds=\"{{ if .length }} {{ duration .length }} {{ end }}\"", + "instant": false, + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Slow queries", + "transformations": [ + { + "id": "extractFields", + "options": { + "source": "labels" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Line": true, + "Time": true, + "caller": true, + "cluster": true, + "component": true, + "container": true, + "gossip_ring_member": true, + "host": true, + "id": true, + "job": true, + "labels": true, + "length": true, + "level": true, + "line": true, + "method": true, + "msg": true, + "name": true, + "namespace": true, + "param_step": true, + "path": true, + "pod": true, + "pod_template_hash": true, + "response_time": true, + "stream": true, + "traceID": true, + "tsNs": true + }, + "indexByName": { + "err": 10, + "length_seconds": 3, + "param_end": 5, + "param_query": 8, + "param_start": 4, + "param_step_seconds": 7, + "param_time": 6, + "response_time_seconds": 9, + "status": 1, + "ts": 0, + "user": 2 + }, + "renameByName": { + "err": "Error", + "length_seconds": "Time span", + "param_end": "End", + "param_query": "Query", + "param_start": "Start", + "param_step_seconds": "Step", + "param_time": "Time (instant query)", + "response_time_seconds": "Duration", + "ts": "Completion date", + "user": "Tenant ID" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "sharded_queries" + }, + { + "destinationType": "number", + "targetField": "split_queries" + }, + { + "destinationType": "number", + "targetField": "fetched_chunk_bytes" + }, + { + "destinationType": "number", + "targetField": "fetched_index_bytes" + }, + { + "destinationType": "number", + "targetField": "response_size_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_hit_bytes" + }, + { + "destinationType": "number", + "targetField": "results_cache_miss_bytes" + }, + { + "destinationType": "number", + "targetField": "estimated_series_count" + }, + { + "destinationType": "number", + "targetField": "fetched_chunks_count" + }, + { + "destinationType": "number", + "targetField": "fetched_series_count" + }, + { + "destinationType": "number", + "targetField": "Time span" + }, + { + "destinationType": "number", + "targetField": "Duration" + }, + { + "destinationType": "number", + "targetField": "Step" + }, + { + "destinationType": "number", + "targetField": "queue_time_seconds" + }, + { + "destinationType": "number", + "targetField": "query_wall_time_seconds" + } + ] + } + } + ], + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "hide": 0, + "includeAll": false, + "label": "Loki data source", + "multi": false, + "name": "loki_datasource", + "query": "loki", + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": "5s", + "value": "5s" + }, + "hide": 0, + "label": "Min duration", + "name": "min_duration", + "options": [ + { + "selected": true, + "text": "5s", + "value": "5s" + } + ], + "query": "5s", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "Tenant ID", + "name": "tenant_id", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "label": "User-Agent HTTP Header", + "name": "user_agent", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Slow queries", + "uid": "mimir-6089e1ce1e678788f46312a0a1e647e6", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-tenants.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-tenants.json new file mode 100644 index 00000000..4b0fe00b --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-tenants.json @@ -0,0 +1,2665 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows various metrics detailed by tenant (user) selected above.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### All series\nNumber of active, in-memory, and owned series per user, and active series matching custom trackers (in parenthesis).\nNote that these counts include all series regardless of the type of data (counter, gauge, native histogram, etc.).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 2, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum(\n (\n cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n - cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n )\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "in-memory", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"max_global_series_per_user\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + }, + { + "expr": "sum(\n cortex_ingester_active_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "sum(\n cortex_ingester_owned_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "owned", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_series_custom_tracker{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "All series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### In-memory series per ingester\nLocal tenant series limit and number of in-memory series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nNote that in-memory series may exceed the local limit if limiting based on owned series is enabled.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 3, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n- cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory series per ingester", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Owned series per ingester\nLocal tenant series limit and number of owned series per ingester.\nBecause series can be unevenly distributed across ingesters, ingesters may hit the local limit at different times.\nOwned series are the subset of an ingester's in-memory series that currently map to it in the ring\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 0, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/local limit .+/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "span": 4, + "targets": [ + { + "expr": "min by (job) (cortex_ingester_local_limits{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", limit=\"max_global_series_per_user\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "local limit ({{job}})", + "legendLink": null + }, + { + "expr": "cortex_ingester_owned_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Owned series per ingester", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Tenant series counts", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Series with exemplars\nNumber of series with exemplars currently in storage.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 5, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "series", + "legendLink": null + } + ], + "title": "Series with exemplars", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Oldest exemplar age\nThe age of the oldest exemplar stored in circular storage.\nUseful to check for what time range the current exemplar buffer limit allows.\nThis usually means the max age for all exemplars for a typical setup.\nThis is not true though if one of the series timestamp is in future compared to rest series.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - min(cortex_ingester_tsdb_exemplar_last_exemplars_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Oldest exemplar age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Native histogram series\nNumber of active native histogram series per user, and active native histogram series matching custom trackers (in parenthesis).\nNote that active series matching custom trackers are included in the total active series count.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 7, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_active_native_histogram_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "active", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_series_custom_tracker{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "active ({{ name }})", + "legendLink": null + } + ], + "title": "Native histogram series", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Total number of buckets used by native histogram series\nTotal number of buckets in active native histogram series per user, and total active native histogram buckets matching custom trackers (in parenthesis).\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n cortex_ingester_active_native_histogram_buckets{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "buckets", + "legendLink": null + }, + { + "expr": "sum by (name) (\n cortex_ingester_active_native_histogram_buckets_custom_tracker{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n) > 0\n", + "format": "time_series", + "legendFormat": "buckets ({{ name }})", + "legendLink": null + } + ], + "title": "Total number of buckets used by native histogram series", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars and native histograms", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor requests incoming rate\nThe rate of requests that have come in to the distributor, including rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 9, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_requests_in_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor requests incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor requests received (accepted) rate\nThe rate of received requests, excluding rejected requests.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 10, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"request_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor requests received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Newest seen sample age\nThe age of the newest received sample seen in the distributors.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "id": 11, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "time() - max(cortex_distributor_latest_seen_sample_timestamp_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"} > 0)", + "format": "time_series", + "legendFormat": "age", + "legendLink": null + } + ], + "title": "Newest seen sample age", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded requests rate\nThe rate of each request's discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 12, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded requests rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor ingestion requests", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor samples incoming rate\nThe rate of samples that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 13, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_samples_in_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor samples incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor samples received (accepted) rate\nThe rate of received samples, excluding rejected and deduped samples.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ingestion_rate\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Distributor samples received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor deduplicated/non-HA\nThe rate of deduplicated samples and the rate of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_deduped_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "deduplicated", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_distributor_non_ha_samples_received_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "non-HA", + "legendLink": null + } + ], + "title": "Distributor deduplicated/non-HA", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor and ingester discarded samples rate\nThe rate of each sample's discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (distributor)", + "legendLink": null + }, + { + "expr": "sum by (reason) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }} (ingester)", + "legendLink": null + } + ], + "title": "Distributor and ingester discarded samples rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Samples ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Distributor exemplars incoming rate\nThe rate of exemplars that have come in to the distributor, including rejected or deduped exemplars.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 17, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_exemplars_in_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars incoming rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor exemplars received (accepted) rate\nThe rate of received exemplars, excluding rejected and deduped exemplars.\nThis number can be sensibly lower than incoming rate because we dedupe the HA sent exemplars, and then reject based on time.\nSee discarded rate for reasons why exemplars are being discarded.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 18, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_distributor_received_exemplars_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Distributor exemplars received (accepted) rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Distributor discarded exemplars rate\nThe rate of each exmplars' discarding reason.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 19, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (reason) (rate(cortex_discarded_exemplars_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{ reason }}", + "legendLink": null + } + ], + "title": "Distributor discarded exemplars rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Ingester appended exemplars rate\nTotal number of exemplars appended in the ingesters.\nThis can be lower than ingested exemplars rate since TSDB does not append the same exemplar twice, and those can be frequent.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 20, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(\n rate(cortex_ingester_tsdb_exemplar_exemplars_appended_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"}[$__rate_interval])\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Ingester appended exemplars rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Exemplars ingestion funnel", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Symbol table size for loaded blocks\nSize of symbol table in memory for loaded blocks, averaged by ingester.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 21, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_symbol_table_size_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Symbol table size for loaded blocks", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Space used by local blocks\nThe number of bytes that are currently used for local storage by all blocks.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "id": 22, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (job) (cortex_ingester_tsdb_storage_blocks_bytes{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Space used by local blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Ingesters' storage", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Number of groups\nTotal number of rule groups for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 23, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "count(sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "time_series", + "legendFormat": "groups", + "legendLink": null + }, + { + "expr": "max(cortex_limits_overrides{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\", user=\"$user\"})\nor\nmax(cortex_limits_defaults{cluster_id=~\"$cluster\", job=~\"($namespace)/((overrides-exporter|mimir-backend.*))\", limit_name=\"ruler_max_rule_groups_per_tenant\"})\n", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Number of groups", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of rules\nTotal number of rules for a tenant.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 24, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "rules", + "legendLink": null + } + ], + "title": "Number of rules", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 25, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_rule_evaluations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Total evaluations rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 26, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (rule_group) (rate(cortex_prometheus_rule_evaluation_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) > 0", + "format": "time_series", + "legendFormat": "{{ rule_group }}", + "legendLink": null + } + ], + "title": "Failed evaluations rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Rules", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "rules", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "seconds", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value #A", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group) (cortex_prometheus_rule_group_last_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Top rules", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 29, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_sent_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Sent notifications rate", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "rate" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 30, + "links": [], + "options": { + "legend": { + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_prometheus_notifications_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "rate", + "legendLink": null + } + ], + "title": "Failed notifications rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Notifications", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 31, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by (user) (cortex_alertmanager_alerts{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "alerts", + "legendLink": null + }, + { + "expr": "sum by (user) (cortex_alertmanager_silences{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "silences", + "legendLink": null + } + ], + "title": "Alerts", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "successful" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 32, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))\n-\non() (sum(rate(cortex_alertmanager_notifications_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) or on () vector(0))\n) > 0\n", + "format": "time_series", + "legendFormat": "successful", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "failed", + "legendLink": null + } + ], + "title": "NPS", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 33, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "(\nsum(rate(cortex_alertmanager_notifications_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)\n-\n(sum(rate(cortex_alertmanager_notifications_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) or\n (sum(rate(cortex_alertmanager_notifications_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration) * 0)\n)) > 0\n", + "format": "time_series", + "legendFormat": "success - {{ integration }}", + "legendLink": null + }, + { + "expr": "sum(rate(cortex_alertmanager_notifications_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((alertmanager|cortex|mimir|mimir-backend.*))\", user=\"$user\"}[$__rate_interval])) by(integration)", + "format": "time_series", + "legendFormat": "failed - {{ integration }}", + "legendLink": null + } + ], + "title": "NPS by integration", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Alertmanager", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 34, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-frontend.*|cortex|mimir|mimir-read.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 35, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((query-scheduler.*|mimir-backend.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (User)", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 36, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(rate(cortex_query_frontend_queries_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-frontend.*))\", user=\"$user\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Queries / Sec", + "legendLink": null + } + ], + "title": "Rate of Read Requests - ruler-query-frontend", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 37, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum(cortex_query_scheduler_queue_length{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler-query-scheduler.*))\", user=\"$user\"})", + "format": "time_series", + "legendFormat": "Queue Length", + "legendLink": null + } + ], + "title": "Number of Queries Queued - ruler-query-scheduler", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read Path - Queries (Ruler)", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Estimated Compaction Jobs\nEstimated number of compaction jobs for selected user, based on latest version of bucket index. When user sends data, ingesters upload new user blocks every 2 hours\n(shortly after 01:00 UTC, 03:00 UTC, 05:00 UTC, etc.), and compactors should process all of the blocks within 2h interval.\nIf this graph regularly goes to zero (or close to zero) in 2 hour intervals, then compaction for this user works correctly.\n\nDepending on the configuration, there are two types of jobs: `split` jobs and `merge` jobs. Split jobs will only show up when user is configured with positive number of `compactor_split_and_merge_shards`.\nValues for split and merge jobs are stacked.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 50, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 38, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (type) (cortex_bucket_index_estimated_compaction_jobs{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\nand ignoring(type)\n(sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Estimated Compaction Jobs", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Number of blocks\nNumber of blocks stored in long-term storage for this user.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 39, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "max by (user) (cortex_bucket_blocks_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\", user=\"$user\"})\n", + "format": "time_series", + "legendFormat": "{{ job }}", + "legendLink": null + } + ], + "title": "Blocks", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Compactions", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "user", + "multi": false, + "name": "user", + "options": [], + "query": "label_values(cortex_ingester_active_series{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}, user)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + }, + { + "selected": false, + "text": "500", + "value": "500" + }, + { + "selected": false, + "text": "1000", + "value": "1000" + } + ], + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Tenants", + "uid": "mimir-35fa247ce651ba189debf33d7ae41611", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-top-tenants.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-top-tenants.json new file mode 100644 index 00000000..08029d12 --- /dev/null +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-top-tenants.json @@ -0,0 +1,1643 @@ +{ + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.0.0" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "targetBlank": false, + "title": "Mimir dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "

\n This dashboard shows the top tenants based on multiple selection criterias.\n Rows are collapsed by default to avoid querying all of them.\n Use the templating variable \"limit\" above to select the amount of users to be shown.\n

\n", + "datasource": null, + "description": "", + "id": 1, + "mode": "markdown", + "span": 12, + "title": "", + "transparent": true, + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Top tenants dashboard description", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_active_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by active series", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By active series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (\n (\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n)", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by in-memory series (series created - series removed)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 4, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (\n (\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n -\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} )\n )\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} )\n)\n\nand\ntopk($limit, sum by (user) (\n (\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n -\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ end())\n )\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ end())\n)\n - sum by (user) (\n (\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_created_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n -\n sum by (user, cluster_id, namespace) (cortex_ingester_memory_series_removed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"} @ start())\n )\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"} @ start())\n)\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by in-memory series (series created - series removed) that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By in-memory series growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "samples/s", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 6, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_distributor_received_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_distributor_received_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by received samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "samples/s", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by discarded samples rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 8, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 12, + "targets": [ + { + "expr": "sum by (user) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\nand\ntopk($limit,\n sum by (user) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ end()))\n -\n sum by (user) (rate(cortex_discarded_samples_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*|distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval] @ start()))\n)\n", + "format": "time_series", + "legendFormat": "{{ user }}", + "legendLink": null + } + ], + "title": "Top $limit users by discarded samples rate that grew the most between query range start and query range end", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By discarded samples rate growth", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "series", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (\n cortex_ingester_tsdb_exemplar_series_with_exemplars_in_storage{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n )\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by series with exemplars", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By series with exemplars", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "exemplars/s", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (user) (rate(cortex_distributor_received_exemplars_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[5m])))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by received exemplars rate in last 5m", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By exemplars rate", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "rules", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_rules{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit biggest groups", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group size", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 3, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "seconds", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit, sum by (rule_group, user) (cortex_prometheus_rule_group_last_duration_seconds{cluster_id=~\"$cluster\", job=~\"($namespace)/((ruler|cortex|mimir|mimir-backend.*))\"}))", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit slowest groups (last evaluation)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By rule group evaluation time", + "titleSize": "h6" + }, + { + "collapse": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "sort": { + "col": 2, + "desc": true + }, + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "hidden" + }, + { + "alias": "Compaction Jobs", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "user", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "link": false, + "linkTargetBlank": false, + "linkTooltip": "Drill down", + "linkUrl": "", + "pattern": "user", + "thresholds": [], + "type": "number", + "unit": "string" + }, + { + "alias": "", + "colorMode": null, + "colors": [], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "string", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk($limit,\n sum by (user) (cortex_bucket_index_estimated_compaction_jobs{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"})\n and ignoring(user)\n (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)\n)\n", + "format": "table", + "instant": true, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Top $limit users by estimated compaction jobs from bucket-index", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "transform": "table", + "type": "table", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "By estimated compaction jobs from bucket-index", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(cortex_build_info, cluster_id)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": "$__all" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "namespace", + "multi": true, + "name": "namespace", + "options": [], + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "10", + "value": "10" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "limit", + "options": [ + { + "selected": true, + "text": "10", + "value": "10" + }, + { + "selected": false, + "text": "50", + "value": "50" + }, + { + "selected": false, + "text": "100", + "value": "100" + } + ], + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Mimir / Top tenants", + "uid": "mimir-bc6e12d4fe540e4a1785b9d3ca0ffdd9", + "version": 0 +} diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-networking.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-networking.json index 766e4c64..7826341a 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-networking.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-networking.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -73,7 +75,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -121,7 +123,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -172,13 +174,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -229,19 +231,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?(distributor|ingester|mimir-write).*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -301,7 +303,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -349,7 +351,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -400,13 +402,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -457,19 +459,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?distributor.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -529,7 +531,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_receive_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -577,7 +579,7 @@ "span": 3, "targets": [ { - "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_network_transmit_bytes_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -628,13 +630,13 @@ "span": 3, "targets": [ { - "expr": "avg(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "expr": "avg(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(cortex_inflight_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "expr": "max(cortex_inflight_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", "format": "time_series", "legendFormat": "highest", "legendLink": null @@ -685,19 +687,19 @@ "span": 3, "targets": [ { - "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", + "expr": "avg(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", "format": "time_series", "legendFormat": "avg", "legendLink": null }, { - "expr": "max(sum by(pod) (cortex_tcp_connections{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", + "expr": "max(sum by(pod) (cortex_tcp_connections{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"}))", "format": "time_series", "legendFormat": "highest", "legendLink": null }, { - "expr": "min(cortex_tcp_connections_limit{cluster=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", + "expr": "min(cortex_tcp_connections_limit{cluster_id=~\"$cluster\", namespace=~\"$namespace\",pod=~\"(.*mimir-)?ingester.*\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null @@ -718,7 +720,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -749,7 +753,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -772,7 +776,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -815,6 +819,6 @@ }, "timezone": "utc", "title": "Mimir / Writes networking", - "uid": "978c1cb452585c96697a238eaac7fe2d", + "uid": "mimir-978c1cb452585c96697a238eaac7fe2d", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-resources.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-resources.json index 27c772e6..bbab08c3 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-resources.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes-resources.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -73,7 +75,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -121,7 +123,7 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -169,7 +171,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor|ingester|mimir-write\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -280,19 +282,19 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -391,19 +393,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -451,7 +453,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"distributor\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -511,7 +513,7 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum by(pod) (cortex_ingester_memory_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -610,19 +612,19 @@ "span": 6, "targets": [ { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "min(container_spec_cpu_quota{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} / container_spec_cpu_period{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"cpu\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -733,19 +735,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_rss{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "max by(pod) (container_memory_rss{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -844,19 +846,19 @@ "span": 4, "targets": [ { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "max by(pod) (container_memory_working_set_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null }, { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", + "expr": "min(container_spec_memory_limit_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"} > 0)", "format": "time_series", "legendFormat": "limit", "legendLink": null }, { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", + "expr": "min(kube_pod_container_resource_requests{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\",resource=\"memory\"})", "format": "time_series", "legendFormat": "request", "legendLink": null @@ -904,7 +906,7 @@ "span": 4, "targets": [ { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\",container=~\"ingester\"})", "format": "time_series", "legendFormat": "{{pod}}", "legendLink": null @@ -964,7 +966,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_written_bytes_total[$__rate_interval]\n )\n)\n+\nignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1012,7 +1014,7 @@ "span": 4, "targets": [ { - "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", + "expr": "sum by(instance, pod, device) (\n rate(\n node_disk_read_bytes_total[$__rate_interval]\n )\n) + ignoring(pod) group_right() (\n label_replace(\n count by(\n instance,\n pod,\n device\n )\n (\n container_fs_writes_bytes_total{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n container=~\"ingester\",\n device!~\".*sda.*\"\n }\n ),\n \"device\",\n \"$1\",\n \"device\",\n \"/dev/(.*)\"\n ) * 0\n)\n\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1063,7 +1065,7 @@ "span": 4, "targets": [ { - "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", + "expr": "max by(persistentvolumeclaim) (\n kubelet_volume_stats_used_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"} /\n kubelet_volume_stats_capacity_bytes{cluster_id=~\"$cluster\", namespace=~\"$namespace\"}\n)\nand\ncount by(persistentvolumeclaim) (\n kube_persistentvolumeclaim_labels{\n cluster_id=~\"$cluster\", namespace=~\"$namespace\",\n label_name=~\"(ingester).*\"\n }\n)\n", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1084,7 +1086,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -1115,7 +1119,7 @@ "multi": false, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -1138,7 +1142,7 @@ "multi": false, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -1181,6 +1185,6 @@ }, "timezone": "utc", "title": "Mimir / Writes resources", - "uid": "bc9160e50b52e89e0e49c840fea3d379", + "uid": "mimir-bc9160e50b52e89e0e49c840fea3d379", "version": 0 } diff --git a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes.json b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes.json index d6b2c773..6be013c5 100644 --- a/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes.json +++ b/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private/mimir-writes.json @@ -21,7 +21,9 @@ "includeVars": true, "keepTime": true, "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "targetBlank": false, "title": "Mimir dashboards", @@ -90,7 +92,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_samples:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_received_samples:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -166,7 +168,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "instant": true, "refId": "A" @@ -242,7 +244,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_memory_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_memory_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster_id, namespace) group_left\nmax by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -318,7 +320,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster, namespace) group_left\nmax by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", + "expr": "sum(cortex_ingester_tsdb_exemplar_exemplars_in_storage{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n/ on(cluster_id, namespace) group_left\nmax by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}))\n", "format": "time_series", "instant": true, "refId": "A" @@ -393,7 +395,7 @@ "steppedLine": false, "targets": [ { - "expr": "count(count by(user) (cortex_ingester_active_series{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", + "expr": "count(count by(user) (cortex_ingester_active_series{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}))", "format": "time_series", "instant": true, "refId": "A" @@ -623,7 +625,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -672,19 +674,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -734,7 +736,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", route=~\"/distributor.Distributor/Push|/httpgrpc.*|api_(v1|prom)_push|otlp_v1_metrics\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -931,7 +933,7 @@ "span": 4, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",route=\"/cortex.Ingester/Push\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -980,19 +982,19 @@ "span": 4, "targets": [ { - "expr": "histogram_quantile(0.99, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "expr": "histogram_quantile(0.99, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", "format": "time_series", "legendFormat": "99th percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum by (le) (cluster_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", + "expr": "histogram_quantile(0.50, sum by (le) (cluster_id_job_route:cortex_request_duration_seconds_bucket:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})) * 1e3", "format": "time_series", "legendFormat": "50th percentile", "refId": "B" }, { - "expr": "1e3 * sum(cluster_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_job_route:cortex_request_duration_seconds_count:sum_rate{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", + "expr": "1e3 * sum(cluster_id_job_route:cortex_request_duration_seconds_sum:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}) / sum(cluster_id_job_route:cortex_request_duration_seconds_count:sum_rate{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"})", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1042,7 +1044,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", + "expr": "histogram_quantile(0.99, sum by(le, pod) (rate(cortex_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", route=\"/cortex.Ingester/Push\"}[$__rate_interval])))", "format": "time_series", "legendFormat": "", "legendLink": null @@ -1059,6 +1061,550 @@ "title": "Ingester", "titleSize": "h6" }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "description": "### Replicas\nThe maximum and current number of distributor replicas.\nNote: The current number of replicas can still show 1 replica even when scaled to 0.\nBecause HPA never reports 0 replicas, the query will report 0 only if the HPA is not active.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Max .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Current .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Min .+/" + }, + "properties": [ + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + } + ] + } + ] + }, + "id": 13, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_max_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Max {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_status_current_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # HPA doesn't go to 0 replicas, so we multiply by 0 if the HPA is not active\n * on (cluster_id, namespace, horizontalpodautoscaler)\n kube_horizontalpodautoscaler_status_condition{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\", condition=\"ScalingActive\", status=\"true\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Current {{ scaletargetref_name }}", + "legendLink": null + }, + { + "expr": "max by (scaletargetref_name) (\n kube_horizontalpodautoscaler_spec_min_replicas{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n # Add the scaletargetref_name label for readability\n + on (cluster_id, namespace, horizontalpodautoscaler) group_left (scaletargetref_name)\n 0*kube_horizontalpodautoscaler_info{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"}\n)\n", + "format": "time_series", + "legendFormat": "Min {{ scaletargetref_name }}", + "legendLink": null + } + ], + "title": "Replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (CPU): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 14, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*cpu.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (CPU): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Scaling metric (memory): Desired replicas\nThis panel shows the scaling metric exposed by KEDA divided by the target/threshold used.\nIt should represent the desired number of replicas, ignoring the min/max constraints applied later.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 15, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by (scaler) (\n label_replace(\n keda_scaler_metrics_value{cluster_id=~\"$cluster\", exported_namespace=~\"$namespace\", scaler=~\".*memory.*\"},\n \"namespace\", \"$1\", \"exported_namespace\", \"(.*)\"\n )\n /\n on(cluster_id, namespace, scaledObject, metric) group_left label_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"},\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n ),\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n )\n)\n", + "format": "time_series", + "legendFormat": "{{ scaler }}", + "legendLink": null + } + ], + "title": "Scaling metric (memory): Desired replicas", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "description": "### Autoscaler failures rate\nThe rate of failures in the KEDA custom metrics API server. Whenever an error occurs, the KEDA custom\nmetrics server is unable to query the scaling metric from Prometheus so the autoscaler woudln't work properly.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "short" + }, + "overrides": [] + }, + "id": 16, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(cluster_id, namespace, scaler, metric, scaledObject) (\n label_replace(\n rate(keda_scaler_errors[$__rate_interval]),\n \"namespace\", \"$1\", \"exported_namespace\", \"(.+)\"\n )\n) +\non(cluster_id, namespace, metric, scaledObject) group_left\nlabel_replace(\n label_replace(\n kube_horizontalpodautoscaler_spec_target_metric{cluster_id=~\"$cluster\", namespace=~\"$namespace\", horizontalpodautoscaler=~\"keda-hpa-distributor\"} * 0,\n \"scaledObject\", \"$1\", \"horizontalpodautoscaler\", \"keda-hpa-(.*)\"\n ),\n \"metric\", \"$1\", \"metric_name\", \"(.+)\"\n)\n", + "format": "time_series", + "legendFormat": "{{scaler}} failures", + "legendLink": null + } + ], + "title": "Autoscaler failures rate", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - autoscaling", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "1xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EAB839", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "2xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "3xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#6ED0E0", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "4xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#EF843C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "5xx" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "OK" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cancel" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#A9A9A9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "error" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E24D42", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "success" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#7EB26D", + "mode": "fixed" + } + } + ] + } + ] + }, + "id": 17, + "links": [], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "format": "time_series", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Requests / sec", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "id": 18, + "links": [], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 6, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-hatracker\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Latency", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Distributor - key-value store for high-availability (HA) deduplication", + "titleSize": "h6" + }, { "collapse": false, "height": "250px", @@ -1238,7 +1784,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1287,19 +1833,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\", kv_name=~\"distributor-(lifecycler|ring)\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1513,7 +2059,7 @@ "span": 6, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-zA-Z]+)\"))\n", "format": "time_series", "legendFormat": "{{status}}", "refId": "A" @@ -1562,19 +2108,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_kv_request_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_kv_request_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_kv_request_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\", kv_name=~\"ingester-.*\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1684,13 +2230,13 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_shipper_uploads_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_shipper_upload_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_shipper_upload_failures_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1740,19 +2286,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(thanos_objstore_bucket_operation_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", + "expr": "sum(rate(thanos_objstore_bucket_operation_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval])) * 1e3 / sum(rate(thanos_objstore_bucket_operation_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\",component=\"ingester\",operation=\"upload\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -1862,13 +2408,13 @@ "span": 6, "targets": [ { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_compactions_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_compactions_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -1918,19 +2464,19 @@ "span": 6, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "99th Percentile", "refId": "A" }, { - "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_bucket{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) by (le)) * 1e3", "format": "time_series", "legendFormat": "50th Percentile", "refId": "B" }, { - "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_ingester_tsdb_compaction_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "Average", "refId": "C" @@ -2040,13 +2586,13 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncations_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -2126,13 +2672,13 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) - sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "successful", "legendLink": null }, { - "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_checkpoint_creations_failed_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "failed", "legendLink": null @@ -2182,7 +2728,7 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", + "expr": "sum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_sum{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))\n/\nsum(rate(cortex_ingester_tsdb_wal_truncate_duration_seconds_count{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval])) >= 0\n", "format": "time_series", "legendFormat": "avg", "legendLink": null @@ -2262,13 +2808,13 @@ "span": 3, "targets": [ { - "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_wal_corruptions_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "WAL", "legendLink": null }, { - "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum(rate(cortex_ingester_tsdb_mmap_chunk_corruptions_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "mmap-ed chunks", "legendLink": null @@ -2329,7 +2875,7 @@ "span": 3, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_exemplars_in:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "incoming exemplars", "legendLink": null @@ -2378,7 +2924,7 @@ "span": 3, "targets": [ { - "expr": "sum(cluster_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", + "expr": "sum(cluster_id_namespace_job:cortex_distributor_received_exemplars:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})", "format": "time_series", "legendFormat": "received exemplars", "legendLink": null @@ -2427,7 +2973,7 @@ "span": 3, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_id_namespace_job:cortex_ingester_ingested_exemplars:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "ingested exemplars", "legendLink": null @@ -2476,7 +3022,7 @@ "span": 3, "targets": [ { - "expr": "sum(\n cluster_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster, namespace) group_left\n max by (cluster, namespace) (cortex_distributor_replication_factor{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", + "expr": "sum(\n cluster_id_namespace_job:cortex_ingester_tsdb_exemplar_exemplars_appended:rate5m{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}\n / on(cluster_id, namespace) group_left\n max by (cluster_id, namespace) (cortex_distributor_replication_factor{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"})\n)\n", "format": "time_series", "legendFormat": "appended exemplars", "legendLink": null @@ -2536,7 +3082,7 @@ "span": 6, "targets": [ { - "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_distributor_instance_rejected_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((distributor.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -2584,7 +3130,7 @@ "span": 6, "targets": [ { - "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", + "expr": "sum by (reason) (rate(cortex_ingester_instance_rejected_requests_total{cluster_id=~\"$cluster\", job=~\"($namespace)/((ingester.*|cortex|mimir|mimir-write.*))\"}[$__rate_interval]))", "format": "time_series", "legendFormat": "{{reason}}", "legendLink": null @@ -2605,7 +3151,9 @@ "schemaVersion": 14, "style": "dark", "tags": [ - "mimir" + "owner:team-atlas", + "topic:observability", + "component:mimir" ], "templating": { "list": [ @@ -2637,7 +3185,7 @@ "multi": true, "name": "cluster", "options": [], - "query": "label_values(cortex_build_info, cluster)", + "query": "label_values(cortex_build_info, cluster_id)", "refresh": 1, "regex": "", "sort": 1, @@ -2661,7 +3209,7 @@ "multi": true, "name": "namespace", "options": [], - "query": "label_values(cortex_build_info{cluster=~\"$cluster\"}, namespace)", + "query": "label_values(cortex_build_info{cluster_id=~\"$cluster\"}, namespace)", "refresh": 1, "regex": "", "sort": 1, @@ -2704,6 +3252,6 @@ }, "timezone": "utc", "title": "Mimir / Writes", - "uid": "8280707b8f16e7b87b840fc1cc92d4c5", + "uid": "mimir-8280707b8f16e7b87b840fc1cc92d4c5", "version": 0 } diff --git a/loki/README.md b/loki/README.md index 55e09b25..8464b0eb 100644 --- a/loki/README.md +++ b/loki/README.md @@ -78,6 +78,7 @@ There's been some extra changes done interactively with Grafana UI. * Added `disk usage` to Write and Backend path panel * Added `total pods` to Write, Read and Backend panels + # Loki canary This one was generated from the [mixins](https://github.com/grafana/loki/tree/main/production/loki-mixin) following these steps: diff --git a/loki/mixin.libsonnet b/loki/mixin.libsonnet index 0b38cb6e..81c4fafd 100644 --- a/loki/mixin.libsonnet +++ b/loki/mixin.libsonnet @@ -11,7 +11,7 @@ loki{ per_cluster_label: 'cluster_id', canary+: { - enabled: true, + enabled: true, import not found: 'loki-mixin/mixin-ssd.libsonnet' }, }, } diff --git a/mimir/.gitignore b/mimir/.gitignore new file mode 100644 index 00000000..123ed3ee --- /dev/null +++ b/mimir/.gitignore @@ -0,0 +1,6 @@ +vendor/ +dashboards_out/ +alerts.yaml +rules.yaml +jsonnetfile.* + diff --git a/mimir/mixin.libsonnet b/mimir/mixin.libsonnet new file mode 100644 index 00000000..d0137a83 --- /dev/null +++ b/mimir/mixin.libsonnet @@ -0,0 +1,13 @@ +local mimir = import 'mimir-mixin/mixin-compiled.libsonnet'; + +mimir{ + _config+:: { + tags: [ + "owner:team-atlas", + "topic:observability", + "component:mimir" + ], + + per_cluster_label: 'cluster_id', + }, +} diff --git a/mimir/update.sh b/mimir/update.sh new file mode 100755 index 00000000..84bd564c --- /dev/null +++ b/mimir/update.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +# Update Mimir mixins from upstream +# +# This script is used to update the Mimir mixins from the upstream repository. +# +# Usage: +# ./mimir/update.sh from the root of the repository + +set -e + +BRANCH="main" +MIXIN_URL=https://github.com/grafana/mimir/operations/mimir-mixin@$BRANCH +helmDir="$(pwd)/helm/dashboards/charts/private_dashboards_mz/dashboards/shared/private" + +cd mimir +rm -rf vendor jsonnetfile.* + +jb init +jb install $MIXIN_URL +mixtool generate all mixin.libsonnet + +for file in dashboards_out/*; do + # Process each file here + echo "$file" + + # adds mimir- prefix to uid + jq '.uid = "mimir-" + .uid' "$file" > "$file.out" && mv "$file.out" "$file" + + echo "Copying dashboard to $helmDir" + cp "$file" "$helmDir" +done