From 05bd60a6becbedf7c36ed29abd0bcf6d777dd15c Mon Sep 17 00:00:00 2001 From: Raihan Khan Date: Sat, 16 Mar 2024 11:04:51 +0600 Subject: [PATCH] Add summary dashboard alerts for kafka and rabbitmq (#937) Signed-off-by: raihankhan --- .../dashboards/kafka/kafka-summary.json | 982 +++++++++++++++--- .../dashboards/rabbitmq/rabbitmq-summary.json | 674 ++++++++++++ 2 files changed, 1518 insertions(+), 138 deletions(-) diff --git a/charts/kubedb-grafana-dashboards/dashboards/kafka/kafka-summary.json b/charts/kubedb-grafana-dashboards/dashboards/kafka/kafka-summary.json index 7f306fba9..09fea9d8f 100644 --- a/charts/kubedb-grafana-dashboards/dashboards/kafka/kafka-summary.json +++ b/charts/kubedb-grafana-dashboards/dashboards/kafka/kafka-summary.json @@ -31,6 +31,10 @@ { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, @@ -115,7 +119,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -124,7 +128,7 @@ "instant": true, "interval": "", "legendFormat": {{ `"{{phase}}"` }}, - "refId": "A" + "refId": "A" } ], "title": "Database Status", @@ -197,7 +201,7 @@ }, "textMode": "name" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -207,7 +211,7 @@ "interval": "", "intervalFactor": 1, "legendFormat": {{ `"{{version}}"` }}, - "refId": "A" + "refId": "A" } ], "title": "Version", @@ -215,43 +219,24 @@ }, { "datasource": "${datasource}", - "description": "When this option is enabled, connections attempted using insecure transport will be rejected.", + "description": "KubeDB Kafka Total Memory Limit", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "fixedColor": "text", + "mode": "fixed" }, - "mappings": [ - { - "from": "", - "id": 1, - "text": "True", - "to": "", - "type": 1, - "value": "1" - }, - { - "from": "", - "id": 2, - "text": "False", - "to": "", - "type": 1, - "value": "0" - } - ], + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null - }, - { - "color": "red", - "value": 80 } ] - } + }, + "unit": "bytes" }, "overrides": [] }, @@ -261,7 +246,7 @@ "x": 16, "y": 1 }, - "id": 82, + "id": 116, "options": { "colorMode": "none", "graphMode": "area", @@ -275,23 +260,20 @@ "values": false }, "text": {}, - "textMode": "value" + "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, - "expr": "kubedb_com_kafka_info{namespace=\"$namespace\", app=\"$app\"}", - "format": "time_series", + "expr": "kubedb_com_kafka_resource_limit_memory{namespace=\"$namespace\", app=\"$app\"}", "instant": true, "interval": "", - "legendFormat": {{ `"{{requireSSL}}"` }}, + "legendFormat": "", "refId": "A" } ], - "timeFrom": null, - "timeShift": null, - "title": "Require Secure Transport", + "title": "Memory Limit", "type": "stat" }, { @@ -359,7 +341,7 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -368,7 +350,7 @@ "instant": true, "interval": "", "legendFormat": {{ `"{{terminationPolicy}}"` }}, - "refId": "A" + "refId": "A" } ], "title": "Termination Policy", @@ -422,7 +404,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -481,7 +463,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -539,7 +521,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -599,7 +581,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -615,24 +597,38 @@ }, { "datasource": "${datasource}", - "description": "KubeDB Kafka Total Memory Limit", + "description": "When this option is enabled, connections attempted using insecure transport will be rejected.", "fieldConfig": { "defaults": { "color": { - "fixedColor": "text", - "mode": "fixed" + "mode": "thresholds" }, - "mappings": [], + "mappings": [ + { + "options": { + "0": { + "text": "False" + }, + "1": { + "text": "True" + } + }, + "type": "value" + } + ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null + }, + { + "color": "red", + "value": 80 } ] - }, - "unit": "bytes" + } }, "overrides": [] }, @@ -642,7 +638,7 @@ "x": 16, "y": 4 }, - "id": 116, + "id": 82, "options": { "colorMode": "none", "graphMode": "area", @@ -656,20 +652,23 @@ "values": false }, "text": {}, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, - "expr": "kubedb_com_kafka_resource_limit_memory{namespace=\"$namespace\", app=\"$app\"}", + "expr": "kubedb_com_kafka_info{namespace=\"$namespace\", app=\"$app\"}", + "format": "time_series", "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "legendFormat": {{ `"{{requireSSL}}"` }}, + "refId": "A" } ], - "title": "Memory Limit", + "timeFrom": null, + "timeShift": null, + "title": "Require Secure Transport", "type": "stat" }, { @@ -718,7 +717,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -729,17 +728,423 @@ "refId": "A" } ], - "title": "Storage Request", - "type": "stat" + "title": "Storage Request", + "type": "stat" + }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "Kafka Down alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 9, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 158, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_kafka_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"NotReady\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:88", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:89", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "alert": { + "alertRuleTags": { + "kdb": "" + }, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "KafkaPhaseCritical", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 9, + "y": 7 + }, + "hiddenSeries": false, + "id": 156, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_kafka_status_phase{app=\"$app\", namespace=\"$namespace\",phase=\"Critical\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Critical Phase", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:70", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:71", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "Kafka Under Replicated Partitions alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Kafka under replicated partitions", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 17, + "y": 7 + }, + "hiddenSeries": false, + "id": 160, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "exemplar": true, + "expr": "kafka_server_replicamanager_underreplicatedpartitions{job=~\"$app-stats\",namespace=\"$namespace\"}", + "instant": false, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Kafka Under Replicated Partitions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, + {{- end }} { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 7 + "y": 15 }, "id": 138, "panels": [], @@ -753,17 +1158,13 @@ "dashes": false, "datasource": "${datasource}", "description": "CPU Usage by Kafka pods", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 8 + "y": 16 }, "hiddenSeries": false, "id": 98, @@ -783,7 +1184,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -797,7 +1198,7 @@ "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-.+$\"}))) by (pod)", "interval": "", "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "refId": "A" } ], "thresholds": [], @@ -841,6 +1242,152 @@ "alignLevel": null } }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "CPU Usage Percentage Alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "CPU Usage Percentage by Pgpool pods", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 154, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-.+$\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-.+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "intervalFactor": 1, + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} { "datasource": "${datasource}", "description": "CPU Quote information in details", @@ -1037,7 +1584,7 @@ { "targetBlank": false, "title": "Drill down", - "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=kafka&var-pod=$__cell" } ] }, @@ -1053,7 +1600,7 @@ "h": 6, "w": 24, "x": 0, - "y": 15 + "y": 30 }, "id": 122, "links": [], @@ -1061,7 +1608,7 @@ "showHeader": true, "sortBy": [] }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -1136,11 +1683,15 @@ { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 21 + "y": 36 }, "id": 140, "panels": [], @@ -1166,7 +1717,7 @@ "h": 6, "w": 24, "x": 0, - "y": 22 + "y": 37 }, "hiddenSeries": false, "id": 100, @@ -1186,7 +1737,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -1200,7 +1751,7 @@ "expr": "sum(container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\",container!=\"\"} * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\"}))) by (pod)", "interval": "", "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "refId": "A" } ], "thresholds": [], @@ -1244,6 +1795,143 @@ "alignLevel": null } }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Memory Usage Percentage", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 162, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\",container!=\"\"} * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\", pod=~\"$app-.+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} { "datasource": "${datasource}", "fieldConfig": { @@ -1507,7 +2195,7 @@ { "targetBlank": false, "title": "Drill down", - "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=kafka&var-pod=$__cell" } ] }, @@ -1523,7 +2211,7 @@ "h": 6, "w": 24, "x": 0, - "y": 28 + "y": 50 }, "id": 124, "links": [], @@ -1536,7 +2224,7 @@ } ] }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, @@ -1643,11 +2331,15 @@ { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 34 + "y": 56 }, "id": 142, "panels": [], @@ -1673,7 +2365,7 @@ "h": 9, "w": 12, "x": 0, - "y": 35 + "y": 57 }, "hiddenSeries": false, "id": 112, @@ -1693,7 +2385,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -1707,7 +2399,7 @@ "expr": "avg(container_blkio_device_usage_total{namespace=\"$namespace\", pod=~\"$app-.+$\"}) by (pod)", "interval": "", "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "refId": "A" } ], "thresholds": [], @@ -1770,7 +2462,7 @@ "h": 9, "w": 12, "x": 12, - "y": 35 + "y": 57 }, "hiddenSeries": false, "id": 108, @@ -1790,7 +2482,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -1806,7 +2498,7 @@ "interval": "", "intervalFactor": 1, "legendFormat": {{ `"{{pod}}-disk-write"` }}, - "refId": "A" + "refId": "A" }, { "exemplar": true, @@ -1815,7 +2507,7 @@ "instant": false, "interval": "", "legendFormat": {{ `"{{pod}}-disk-read"` }}, - "refId": "B" + "refId": "B" } ], "thresholds": [], @@ -1877,7 +2569,7 @@ "h": 7, "w": 12, "x": 0, - "y": 44 + "y": 66 }, "hiddenSeries": false, "id": 126, @@ -1898,7 +2590,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -1909,14 +2601,14 @@ "targets": [ { "exemplar": true, - "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-.+$\"}[5m]) + rate(container_fs_writes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-.+$\"}[5m])))", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m]) + rate(container_fs_writes_total{container!=\"\" ,namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m])))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], @@ -1978,7 +2670,7 @@ "h": 7, "w": 12, "x": 12, - "y": 44 + "y": 66 }, "hiddenSeries": false, "id": 128, @@ -1999,7 +2691,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -2010,14 +2702,14 @@ "targets": [ { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-.+$\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-.+$\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\" ,namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m]))", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], @@ -2276,7 +2968,7 @@ { "targetBlank": false, "title": "Drill down to pods", - "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=kafka&var-pod=$__cell" } ] }, @@ -2292,18 +2984,18 @@ "h": 7, "w": 24, "x": 0, - "y": 51 + "y": 73 }, "id": 130, "links": [], "options": { "showHeader": true }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2314,7 +3006,7 @@ }, { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2325,7 +3017,7 @@ }, { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\",pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\",pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2336,7 +3028,7 @@ }, { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2347,7 +3039,7 @@ }, { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2358,7 +3050,7 @@ }, { "exemplar": true, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\",pod=~\"$app-.+$\",namespace=~\"$namespace\"}[5m]))", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\",pod=~\"$app-.+$\",namespace=\"$namespace\"}[5m]))", "format": "table", "instant": true, "interval": "", @@ -2384,11 +3076,15 @@ { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 58 + "y": 80 }, "id": 152, "panels": [], @@ -2426,17 +3122,17 @@ "h": 6, "w": 12, "x": 0, - "y": 59 + "y": 81 }, "id": 148, "options": { "showHeader": true }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, - "expr": "kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=~\"$namespace\"}", + "expr": "kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=\"$namespace\"}", "format": "table", "instant": true, "interval": "", @@ -2479,6 +3175,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 100, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -2500,7 +3198,7 @@ "h": 6, "w": 12, "x": 12, - "y": 59 + "y": 81 }, "id": 146, "options": { @@ -2516,15 +3214,15 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "targets": [ { "exemplar": true, - "expr": "(kubelet_volume_stats_used_bytes / on(persistentvolumeclaim) group_left(pod) (kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=~\"$namespace\"}) )* 100", + "expr": "(kubelet_volume_stats_used_bytes / on(persistentvolumeclaim) group_left(pod) (kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=\"$namespace\"}) )* 100", "instant": true, "interval": "", "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "refId": "A" } ], "title": "Persistent Volume Usage", @@ -2551,7 +3249,8 @@ "hideFrom": { "graph": false, "legend": false, - "tooltip": false + "tooltip": false, + "viz": false }, "lineInterpolation": "linear", "lineStyle": { @@ -2563,7 +3262,14 @@ "type": "linear" }, "showPoints": "never", - "spanNulls": true + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { @@ -2583,7 +3289,7 @@ "h": 8, "w": 24, "x": 0, - "y": 65 + "y": 87 }, "id": 150, "options": { @@ -2596,7 +3302,7 @@ "displayMode": "table", "placement": "right" }, - "tooltipOptions": { + "tooltip": { "mode": "single" } }, @@ -2604,11 +3310,11 @@ "targets": [ { "exemplar": true, - "expr": "(kubelet_volume_stats_used_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=~\"$namespace\"}) ", + "expr": "(kubelet_volume_stats_used_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"$app-.+$\",namespace=\"$namespace\"}) ", "interval": "", "intervalFactor": 1, "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "refId": "A" } ], "title": "Persistent Volume Usage History", @@ -2617,11 +3323,15 @@ { "collapsed": false, "datasource": "${datasource}", + "fieldConfig": { + "defaults": {}, + "overrides": [] + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 73 + "y": 95 }, "id": 144, "panels": [], @@ -2634,17 +3344,13 @@ "dashLength": 10, "dashes": false, "datasource": "${datasource}", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 7, "w": 12, "x": 0, - "y": 74 + "y": 96 }, "hiddenSeries": false, "id": 132, @@ -2665,7 +3371,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -2676,14 +3382,14 @@ "targets": [ { "exemplar": true, - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$app-.+$\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_receive_bytes_total{namespace=\"$namespace\", pod=~\"$app-.+$\"}[$__rate_interval])) by (pod)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], @@ -2745,7 +3451,7 @@ "h": 7, "w": 12, "x": 12, - "y": 74 + "y": 96 }, "hiddenSeries": false, "id": 134, @@ -2766,7 +3472,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.2.3", "pointradius": 2, "points": false, "renderer": "flot", @@ -2777,14 +3483,14 @@ "targets": [ { "exemplar": true, - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$app-.+$\"}[$__rate_interval])) by (pod)", + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=\"$namespace\", pod=~\"$app-.+$\"}[$__rate_interval])) by (pod)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], diff --git a/charts/kubedb-grafana-dashboards/dashboards/rabbitmq/rabbitmq-summary.json b/charts/kubedb-grafana-dashboards/dashboards/rabbitmq/rabbitmq-summary.json index ef2c7909f..6b1204c63 100644 --- a/charts/kubedb-grafana-dashboards/dashboards/rabbitmq/rabbitmq-summary.json +++ b/charts/kubedb-grafana-dashboards/dashboards/rabbitmq/rabbitmq-summary.json @@ -732,6 +732,397 @@ "title": "Storage Request", "type": "stat" }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "RabbitMQ Down alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 9, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 158, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_rabbitmq_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"NotReady\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RabbitMQ Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:88", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:89", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "alert": { + "alertRuleTags": { + "kdb": "" + }, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "RabbitMQPhaseCritical", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 9, + "y": 7 + }, + "hiddenSeries": false, + "id": 156, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_rabbitmq_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"Critical\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RabbitMQ Critical Phase", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:70", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:71", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "10s", + "handler": 1, + "name": "RabbitMQ Down alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 17, + "y": 7 + }, + "hiddenSeries": false, + "id": 160, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_rabbitmq_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"NotReady\"}", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "RabbitMQ Down", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:88", + "format": "short", + "logBase": 1, + "show": true + }, + { + "$$hashKey": "object:89", + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + {{- end }} { "collapsed": false, "datasource": "${datasource}", @@ -841,6 +1232,152 @@ "alignLevel": null } }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "CPU Usage Percentage Alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "CPU Usage Percentage by Pgpool pods", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 154, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-.+$\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-.+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "intervalFactor": 1, + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} { "datasource": "${datasource}", "description": "CPU Quote information in details", @@ -1244,6 +1781,143 @@ "alignLevel": null } }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "1m", + "handler": 1, + "name": "Memory Usage Percentage", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 43 + }, + "hiddenSeries": false, + "id": 162, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-.+$\",container!=\"\"} * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\", pod=~\"$app-.+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} { "datasource": "${datasource}", "fieldConfig": {