diff --git a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_databases_dashboard.json b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_databases_dashboard.json index 848288bd7..885380267 100644 --- a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_databases_dashboard.json +++ b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_databases_dashboard.json @@ -20,7 +20,7 @@ ] }, "description": "This dashboard works with postgres_exporter for prometheus", - "editable": false, + "editable": true, "gnetId": 9628, "graphTooltip": 0, "id": 7, diff --git a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_pods_dashboard.json b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_pods_dashboard.json index 9f8578cd1..47e91513b 100644 --- a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_pods_dashboard.json +++ b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_pods_dashboard.json @@ -20,7 +20,7 @@ ] }, "description": "This dashboard works with postgres_exporter for prometheus", - "editable": false, + "editable": true, "gnetId": 9628, "graphTooltip": 0, "id": 20, diff --git a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_summary_dashboard.json b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_summary_dashboard.json index f0709c279..925580f2a 100644 --- a/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_summary_dashboard.json +++ b/charts/kubedb-grafana-dashboards/dashboards/postgres/postgres_summary_dashboard.json @@ -1,4 +1,5 @@ {{- $shared := and (eq .Values.app.name "") (eq .Values.app.namespace "") -}} +{{- $alerts := (eq $.Values.dashboard.alerts true) -}} { "annotations": { "list": [ @@ -23,13 +24,13 @@ "editable": true, "gnetId": 9628, "graphTooltip": 0, - "id": 34, - "iteration": 1682833714433, + "id": null, + "iteration": 1705573409553, "links": [], "panels": [ { "collapsed": false, - "datasource": "${datasource}", + "datasource": null, "gridPos": { "h": 1, "w": 24, @@ -43,7 +44,7 @@ }, { "cacheTimeout": null, - "datasource": "${datasource}", + "datasource": null, "description": "Postgresql Version", "fieldConfig": { "defaults": { @@ -91,7 +92,7 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -108,7 +109,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Database uptime", "fieldConfig": { "defaults": { @@ -152,7 +153,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -167,7 +168,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Total replica count of the Postgres database", "fieldConfig": { "defaults": { @@ -214,7 +215,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -229,7 +230,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Represent database status", "fieldConfig": { "defaults": { @@ -301,7 +302,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -317,7 +318,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "KubeDB Postgres Current SSL Mode", "fieldConfig": { "defaults": { @@ -369,7 +370,7 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -385,7 +386,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "KubeDB Postgres Resource terminationPolicy", "fieldConfig": { "defaults": { @@ -449,7 +450,7 @@ "text": {}, "textMode": "value" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -465,7 +466,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Initial Requested CPU amount by Postgres Instance", "fieldConfig": { "defaults": { @@ -509,7 +510,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -524,7 +525,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "CPU Limit in core by Postgres instance", "fieldConfig": { "defaults": { @@ -567,7 +568,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -582,7 +583,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Initial Requested Memory amount by Postgres Instance", "fieldConfig": { "defaults": { @@ -627,7 +628,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -642,7 +643,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "KubeDB Postgres Total Memory Limit", "fieldConfig": { "defaults": { @@ -686,7 +687,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -701,7 +702,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Intial Storage Request by Postgres Instance when deploying", "fieldConfig": { "defaults": { @@ -746,7 +747,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -761,7 +762,7 @@ "type": "stat" }, { - "datasource": "${datasource}", + "datasource": null, "description": "Maximum number of concurrent connections of the Postgres instance", "fieldConfig": { "defaults": { @@ -804,7 +805,7 @@ "text": {}, "textMode": "auto" }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, @@ -819,27 +820,58 @@ "title": "Max Connections", "type": "stat" }, + {{- if $alerts }} { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 100 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql Restarted alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "description": "The current active line shows the current primary pod", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, + "h": 8, + "w": 8, "x": 0, "y": 7 }, "hiddenSeries": false, - "id": 102, + "id": 163, "legend": { "avg": false, "current": false, @@ -853,21 +885,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -877,19 +898,27 @@ "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "AVG(rate(pg_stat_replication_reply_time{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) by (pod)", - "instant": false, + "exemplar": true, + "expr": "time() - pg_postmaster_start_time_seconds{job=\"$app-stats\",namespace=\"$namespace\"}", "interval": "", "legendFormat": {{ `"{{pod}}"` }}, "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 100, + "visible": true + } + ], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Primary Pod FailOver Indicator", + "title": "Postgresql Restarted", "tooltip": { "shared": true, "sort": 0, @@ -927,26 +956,56 @@ } }, { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql Down alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "description": "Postgres connections information in real time", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, - "w": 12, - "x": 12, + "h": 8, + "w": 8, + "x": 8, "y": 7 }, "hiddenSeries": false, - "id": 120, + "id": 157, "legend": { "avg": false, "current": false, @@ -960,21 +1019,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -984,19 +1032,27 @@ "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "sum(pg_stat_activity_count{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"}) by (state)", - "instant": false, + "exemplar": true, + "expr": "pg_up{job=\"$app-stats\",namespace=\"$namespace\"}", "interval": "", - "legendFormat": {{ `"{{state}}"` }}, + "legendFormat": {{ `"{{pod}}"` }}, "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1, + "visible": true + } + ], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Connnections", + "title": "Postgresql Down", "tooltip": { "shared": true, "sort": 0, @@ -1034,120 +1090,56 @@ } }, { - "datasource": "${datasource}", - "description": "Track the current Primary for Raft", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "operator": { + "type": "and" }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 16 - }, - "id": 147, - "options": { - "graph": {}, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltipOptions": { - "mode": "single" - } - }, - "pluginVersion": "7.5.11", - "targets": [ - { - "exemplar": true, - "expr": "pg_coordinator_raft_primary{namespace=\"$namespace\", pod=~\"$app-.\"}", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Raft Primary Tracker", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 24 + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql Phase Not Ready alert", + "noDataState": "no_data", + "notifications": [] }, - "id": 138, - "panels": [], - "title": "CPU Info", - "type": "row" - }, - { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "description": "CPU Usage by Postgres pods", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, + "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 25 + "h": 8, + "w": 8, + "x": 16, + "y": 7 }, "hiddenSeries": false, - "id": 98, + "id": 171, "legend": { "avg": false, "current": false, @@ -1161,21 +1153,10 @@ "linewidth": 1, "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "8.0.7", "pointradius": 2, "points": false, "renderer": "flot", @@ -1185,18 +1166,27 @@ "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "exemplar": true, + "expr": "kubedb_com_postgres_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"NotReady\"}", "interval": "", - "legendFormat": {{ `"{{pod}}"` }}, + "legendFormat": {{ `"{{postgres}}"` }}, "refId": "A" } ], - "thresholds": [], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "CPU Usage", + "title": "Postgresql Phase Not Ready", "tooltip": { "shared": true, "sort": 0, @@ -1234,17 +1224,976 @@ } }, { - "datasource": "${datasource}", - "description": "CPU Quote information in details", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql Critical Phase alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 15 + }, + "hiddenSeries": false, + "id": 173, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.0.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "kubedb_com_postgres_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"Critical\"}", + "interval": "", + "legendFormat": {{ `"{{postgres}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Postgresql Critical Phase", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgres Ops Request Failed alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 15 + }, + "hiddenSeries": false, + "id": 177, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "ops_kubedb_com_postgresopsrequest_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"Failed\"}", + "interval": "", + "legendFormat": {{ `"{{postgres}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Postgres Ops Request Failed", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "30m", + "frequency": "30s", + "handler": 1, + "name": "Postgres Ops Request Progressing Too Long alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 15 + }, + "hiddenSeries": false, + "id": 175, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "ops_kubedb_com_postgresopsrequest_status_phase{app=\"$app\",namespace=\"$namespace\",phase=\"Progressing\"}", + "interval": "", + "legendFormat": {{ `"{{postgres}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Postgres Ops Request Progressing Too Long", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "The current active line shows the current primary pod", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 23 + }, + "hiddenSeries": false, + "id": 102, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "AVG(rate(pg_stat_replication_reply_time{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) by (pod)", + "instant": false, + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Primary Pod FailOver Indicator", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Postgres connections information in real time", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 23 + }, + "hiddenSeries": false, + "id": 120, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "sum(pg_stat_activity_count{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"}) by (state)", + "instant": false, + "interval": "", + "legendFormat": {{ `"{{state}}"` }}, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connnections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": null, + "description": "Track the current Primary for Raft", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 147, + "options": { + "graph": {}, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.5.11", + "targets": [ + { + "exemplar": true, + "expr": "pg_coordinator_raft_primary{namespace=\"$namespace\", pod=~\"$app-.\"}", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Raft Primary Tracker", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 138, + "panels": [], + "title": "CPU Info", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "CPU Usage by Postgres pods", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 41 + }, + "hiddenSeries": false, + "id": 98, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": false, + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}))) by (pod)", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.7 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "CPU Usage Percentage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 51 + }, + "hiddenSeries": false, + "id": 179, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$|$app-arbiter\\\\d+-\\\\d+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$|$app-arbiter\\\\d+-\\\\d+$\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$|$app-arbiter\\\\d+-\\\\d+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.7, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:82", + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:83", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} + { + "datasource": null, + "description": "CPU Quote information in details", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "center", + "displayMode": "auto", + "filterable": false }, "decimals": 2, "displayName": "", @@ -1277,7 +2226,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1301,7 +2250,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1325,7 +2274,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1349,7 +2298,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1373,7 +2322,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1397,7 +2346,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] }, @@ -1431,7 +2380,7 @@ }, { "id": "custom.align", - "value": null + "value": "center" } ] } @@ -1441,18 +2390,18 @@ "h": 7, "w": 24, "x": 0, - "y": 35 + "y": 61 }, "id": 122, "links": [], "options": { "showHeader": true }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}))) by (pod)", "format": "table", "instant": true, "interval": "", @@ -1463,7 +2412,7 @@ }, { "exemplar": false, - "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", "format": "table", "instant": true, "interval": "", @@ -1474,7 +2423,7 @@ }, { "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}))) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"} * on (namespace,pod) group_left () max by (namespace,pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", "format": "table", "instant": true, "interval": "", @@ -1485,7 +2434,7 @@ }, { "exemplar": false, - "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"} * on (namespace,pod) group_left() max by (namespace, pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", "format": "table", "instant": true, "interval": "", @@ -1496,7 +2445,7 @@ }, { "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "sum(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])) * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}))) by (pod) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"cpu\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"} * on (namespace,pod) group_left() max by (namespace, pod) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", "format": "table", "instant": true, "interval": "", @@ -1521,12 +2470,12 @@ }, { "collapsed": false, - "datasource": "${datasource}", + "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 42 + "y": 68 }, "id": 140, "panels": [], @@ -1538,7 +2487,7 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", + "datasource": null, "description": "Memory Usage by Postgres pods", "fieldConfig": { "defaults": { @@ -1552,7 +2501,7 @@ "h": 10, "w": 24, "x": 0, - "y": 43 + "y": 69 }, "hiddenSeries": false, "id": 100, @@ -1583,7 +2532,7 @@ } }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -1594,7 +2543,7 @@ "targets": [ { "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", + "expr": "sum(container_memory_working_set_bytes{image!=\"\",job=\"kubelet\",metrics_path=\"/metrics/cadvisor\",namespace=\"$namespace\", pod=~\"$app-\\\\d+$\",container!=\"\"} * on (namespace, pod) group_left (node) topk by (namespace, pod) (1, max by (namespace, pod, node) (kube_pod_info{node!=\"\"}))) by (pod)", "interval": "", "legendFormat": {{ `"{{pod}}"` }}, "refId": "A" @@ -1642,7 +2591,7 @@ } }, { - "datasource": "${datasource}", + "datasource": null, "fieldConfig": { "defaults": { "color": { @@ -1920,154 +2869,524 @@ "h": 7, "w": 24, "x": 0, - "y": 53 + "y": 79 }, "id": 124, "links": [], "options": { "showHeader": true }, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", + "targets": [ + { + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum (kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(container_memory_rss{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "F", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(container_memory_cache{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "G", + "step": 10 + }, + { + "exemplar": false, + "expr": "sum(container_memory_swap{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "H", + "step": 10 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory Quota", + "transformations": [ + { + "id": "merge", + "options": { + "reducers": [] + } + } + ], + "type": "table" + }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Memory Usage Percentage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 86 + }, + "hiddenSeries": false, + "id": 181, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$|$app-arbiter\\\\d+-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",resource=\"memory\",namespace=\"$namespace\",pod=~\"$app-\\\\d+$|$app-arbiter\\\\d+-\\\\d+$\"} * on (namespace, pod, cluster) group_left () max by (namespace, pod, cluster) ((kube_pod_status_phase{phase=~\"Pending|Running\"} == 1))) by (pod)", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 94 + }, + "id": 142, + "panels": [], + "title": "Storage Info", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "Disk usage by Postgres pods", + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 95 + }, + "hiddenSeries": false, + "id": 112, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "A", - "step": 10 - }, - { - "exemplar": false, - "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "avg(container_blkio_device_usage_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 - }, + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"}) by (pod)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "exemplar": false, - "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\"}) by (pod)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "System Disk Usage Information", + "fieldConfig": { + "defaults": { + "unit": "none" }, - { - "exemplar": false, - "expr": "sum(container_memory_working_set_bytes{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 95 + }, + "hiddenSeries": false, + "id": 108, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right" }, - { - "exemplar": false, - "expr": "sum(container_memory_rss{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "F", - "step": 10 + "tooltip": { + "mode": "single" }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { "exemplar": false, - "expr": "sum(container_memory_cache{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(rate(container_fs_writes_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))by(pod)", + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "G", - "step": 10 + "intervalFactor": 1, + "legendFormat": {{ `"{{pod}}-disk-write"` }}, + "refId": "A" }, { "exemplar": false, - "expr": "sum(container_memory_swap{namespace=\"$namespace\",pod=~\"$app-\\\\d+$\",container!=\"\"}) by (pod)", - "format": "table", - "instant": true, + "expr": "sum(rate(container_fs_reads_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))by(pod)", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "H", - "step": 10 + "legendFormat": {{ `"{{pod}}-disk-read"` }}, + "refId": "B" } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Memory Quota", - "transformations": [ + "title": "Disk R/W Info", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "id": "merge", - "options": { - "reducers": [] - } + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "type": "table" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 60 - }, - "id": 142, - "panels": [], - "title": "Storage Info", - "type": "row" + "yaxis": { + "align": false, + "alignLevel": null + } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "description": "Disk usage by Postgres pods", + "datasource": null, "fieldConfig": { "defaults": { - "unit": "bytes" + "unit": "short" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 104 }, "hiddenSeries": false, - "id": 112, + "id": 126, "legend": { "avg": false, "current": false, @@ -2079,6 +3398,7 @@ }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true, @@ -2095,7 +3415,7 @@ } }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -2106,17 +3426,21 @@ "targets": [ { "exemplar": false, - "expr": "avg(container_blkio_device_usage_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}) by (pod)", + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]) + rate(container_fs_writes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])))", + "format": "time_series", "interval": "", + "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Disk Usage", + "title": "IOPS(Reads+Writes)", "tooltip": { "shared": true, "sort": 0, @@ -2132,7 +3456,7 @@ }, "yaxes": [ { - "format": "bytes", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -2158,24 +3482,23 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "description": "System Disk Usage Information", + "datasource": null, "fieldConfig": { "defaults": { - "unit": "none" + "unit": "Bps" }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 9, + "h": 7, "w": 12, "x": 12, - "y": 61 + "y": 104 }, "hiddenSeries": false, - "id": 108, + "id": 128, "legend": { "avg": false, "current": false, @@ -2187,13 +3510,14 @@ }, "lines": true, "linewidth": 1, + "links": [], "nullPointMode": "null", "options": { "alertThreshold": true, "legend": { "calcs": [], - "displayMode": "table", - "placement": "right" + "displayMode": "list", + "placement": "bottom" }, "tooltip": { "mode": "single" @@ -2203,7 +3527,7 @@ } }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -2214,28 +3538,21 @@ "targets": [ { "exemplar": false, - "expr": "sum(rate(container_fs_writes_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))by(pod)", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": {{ `"{{pod}}-disk-write"` }}, - "refId": "A" - }, - { - "exemplar": false, - "expr": "sum(rate(container_fs_reads_total{namespace=\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))by(pod)", - "hide": false, - "instant": false, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))", + "format": "time_series", "interval": "", - "legendFormat": {{ `"{{pod}}-disk-read"` }}, - "refId": "B" + "intervalFactor": 2, + "legendFormat": {{ `"{{pod}}"` }}, + "legendLink": null, + "refId": "A", + "step": 10 } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Disk R/W Info", + "title": "ThroughPut(Read+Write)", "tooltip": { "shared": true, "sort": 0, @@ -2251,7 +3568,7 @@ }, "yaxes": [ { - "format": "none", + "format": "Bps", "label": null, "logBase": 1, "max": null, @@ -2273,818 +3590,1405 @@ } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", + "datasource": null, "fieldConfig": { "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "decimals": 2, + "displayName": "", + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "short" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" + }, + "properties": [ + { + "id": "displayName", + "value": "Time" + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #A" + }, + "properties": [ + { + "id": "displayName", + "value": "IOPS(Reads)" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": -1 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #B" + }, + "properties": [ + { + "id": "displayName", + "value": "IOPS(Writes)" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": -1 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #C" + }, + "properties": [ + { + "id": "displayName", + "value": "IOPS(Reads + Writes)" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": -1 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #D" + }, + "properties": [ + { + "id": "displayName", + "value": "Throughput(Read)" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #E" + }, + "properties": [ + { + "id": "displayName", + "value": "Throughput(Write)" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Value #F" + }, + "properties": [ + { + "id": "displayName", + "value": "Throughput(Read + Write)" + }, + { + "id": "unit", + "value": "Bps" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "custom.align", + "value": null + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "pod" + }, + "properties": [ + { + "id": "displayName", + "value": "Pod" + }, + { + "id": "unit", + "value": "short" + }, + { + "id": "decimals", + "value": 2 + }, + { + "id": "links", + "value": [ + { + "targetBlank": false, + "title": "Drill down to pods", + "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" + } + ] + }, + { + "id": "custom.align", + "value": null + } + ] + } + ] }, - "fill": 1, - "fillGradient": 0, "gridPos": { "h": 7, - "w": 12, + "w": 24, "x": 0, - "y": 70 - }, - "hiddenSeries": false, - "id": 126, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false + "y": 111 }, - "lines": true, - "linewidth": 1, + "id": 130, "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "showHeader": true }, - "percentage": false, - "pluginVersion": "7.5.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, + "pluginVersion": "{{ .Values.grafana.version }}", "targets": [ { "exemplar": false, - "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]) + rate(container_fs_writes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m])))", - "format": "time_series", + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, "interval": "", "intervalFactor": 2, - "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, + "legendFormat": "", "refId": "A", "step": 10 - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IOPS(Reads+Writes)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ + }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "exemplar": false, + "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 }, { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "Bps" + "exemplar": false, + "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\",pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "C", + "step": 10 }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 70 - }, - "hiddenSeries": false, - "id": 128, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + { + "exemplar": false, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 10 }, - "tooltip": { - "mode": "single" + { + "exemplar": false, + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "E", + "step": 10 }, - "tooltipOptions": { - "mode": "single" - } - }, - "percentage": false, - "pluginVersion": "7.5.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ { "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\" ,namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[5m]))", - "format": "time_series", + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\",pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", + "format": "table", + "instant": true, "interval": "", "intervalFactor": 2, - "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", + "legendFormat": "", + "refId": "F", "step": 10 } ], - "thresholds": [], "timeFrom": null, - "timeRegions": [], "timeShift": null, - "title": "ThroughPut(Read+Write)", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, + "title": "Current Storage IO", + "transformations": [ { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true + "id": "merge", + "options": { + "reducers": [] + } } ], - "yaxis": { - "align": false, - "alignLevel": null - } + "type": "table" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 118 + }, + "id": 149, + "panels": [], + "title": "Persistent Storage Insight", + "type": "row" }, { - "datasource": "${datasource}", + "datasource": null, + "description": "This panel describes allocated persistent storage in a table format\n", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { - "align": null, - "displayMode": "auto", + "align": "center", + "displayMode": "color-text", "filterable": false }, - "decimals": 2, - "displayName": "", "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", + "color": "light-blue", "value": null - }, - { - "color": "red", - "value": 80 } ] }, - "unit": "short" + "unit": "decbytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "displayName", - "value": "Time" - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #A" - }, - "properties": [ - { - "id": "displayName", - "value": "IOPS(Reads)" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": -1 - }, - { - "id": "custom.align", - "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #B" + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 119 + }, + "id": 153, + "options": { + "showHeader": true + }, + "pluginVersion": "{{ .Values.grafana.version }}", + "targets": [ + { + "exemplar": true, + "expr": "kubelet_volume_stats_capacity_bytes{} + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Allocated Storage", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "endpoint": true, + "instance": true, + "job": true, + "metrics_path": true, + "namespace": true, + "node": true, + "persistentvolumeclaim": true, + "service": true }, - "properties": [ - { - "id": "displayName", - "value": "IOPS(Writes)" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": -1 - }, - { - "id": "custom.align", - "value": null - } - ] + "indexByName": {}, + "renameByName": { + "Value": "Volume", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": null, + "description": "This panel shows the persistent volume usage in percentage. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, - { - "matcher": { - "id": "byName", - "options": "Value #C" - }, - "properties": [ - { - "id": "displayName", - "value": "IOPS(Reads + Writes)" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": -1 - }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "custom.align", + "color": "green", "value": null - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Value #D" - }, - "properties": [ - { - "id": "displayName", - "value": "Throughput(Read)" - }, - { - "id": "unit", - "value": "Bps" }, { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null + "color": "red", + "value": 80 } ] }, - { - "matcher": { - "id": "byName", - "options": "Value #E" - }, - "properties": [ - { - "id": "displayName", - "value": "Throughput(Write)" - }, - { - "id": "unit", - "value": "Bps" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "custom.align", - "value": null - } - ] + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 119 + }, + "id": 155, + "options": { + "displayMode": "gradient", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "text": {} + }, + "pluginVersion": "{{ .Values.grafana.version }}", + "targets": [ + { + "exemplar": true, + "expr": "(kubelet_volume_stats_used_bytes{} / on(persistentvolumeclaim) group_left(pod) (kubelet_volume_stats_capacity_bytes{} + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}) )* 100", + "instant": true, + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "title": "Persistent Volume Usage", + "transformations": [], + "type": "bargauge" + }, + { + "datasource": null, + "description": "This panel describes the persistent volume usage in a time series. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "matcher": { - "id": "byName", - "options": "Value #F" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 5000, + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "opacity", + "hideFrom": { + "graph": false, + "legend": false, + "tooltip": false, + "viz": false }, - "properties": [ - { - "id": "displayName", - "value": "Throughput(Read + Write)" - }, - { - "id": "unit", - "value": "Bps" - }, - { - "id": "decimals", - "value": 2 - }, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "custom.align", + "color": "green", "value": null } ] }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 125 + }, + "id": 151, + "options": { + "graph": {}, + "legend": { + "calcs": [ + "max", + "lastNotNull" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.5.5", + "targets": [ + { + "exemplar": true, + "expr": "(kubelet_volume_stats_used_bytes{} + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}) ", + "interval": "", + "intervalFactor": 1, + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "title": "Persistent Volume Usage History", + "type": "timeseries" + }, + {{- if $alerts }} + { + "alert": { + "alertRuleTags": {}, + "conditions": [ { - "matcher": { - "id": "byName", - "options": "pod" + "evaluator": { + "params": [ + 0.8 + ], + "type": "gt" }, - "properties": [ - { - "id": "displayName", - "value": "Pod" - }, - { - "id": "unit", - "value": "short" - }, - { - "id": "decimals", - "value": 2 - }, - { - "id": "links", - "value": [ - { - "targetBlank": false, - "title": "Drill down to pods", - "url": "./d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?var-datasource=$datasource&var-cluster=$cluster&var-namespace=$namespace&var-pod=$__cell" - } - ] - }, - { - "id": "custom.align", - "value": null - } - ] + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" } - ] + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Volume Usage Percentage alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "description": "", + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] }, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 7, + "h": 8, "w": 24, "x": 0, - "y": 77 + "y": 133 }, - "id": 130, + "hiddenSeries": false, + "id": 183, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "exemplar": true, + "expr": "(kubelet_volume_stats_used_bytes / on(persistentvolumeclaim) group_left(pod) (kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}) )", + "interval": "", + "legendFormat": {{ `"{{pod}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.8, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Volume Usage Percentage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + {{- end }} + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 141 + }, + "id": 144, + "panels": [], + "title": "Network Info", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "unit": "Bps" + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 142 + }, + "hiddenSeries": false, + "id": 132, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, "links": [], + "nullPointMode": "null", "options": { - "showHeader": true + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + }, + "tooltipOptions": { + "mode": "single" + } }, - "pluginVersion": "7.5.5", + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[$__rate_interval])) by (pod)", + "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": {{ `"{{pod}}"` }}, + "legendLink": null, "refId": "A", "step": 10 - }, + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Receive Bandwidth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_writes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "B", - "step": 10 + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true }, { - "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_reads_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_total{container!=\"\",pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "C", - "step": 10 + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fieldConfig": { + "defaults": { + "unit": "Bps" }, - { - "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "D", - "step": 10 + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 142 + }, + "hiddenSeries": false, + "id": 134, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true, + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" }, - { - "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "refId": "E", - "step": 10 + "tooltip": { + "mode": "single" }, + "tooltipOptions": { + "mode": "single" + } + }, + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ { "exemplar": false, - "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]) + rate(container_fs_writes_bytes_total{container!=\"\",pod=~\"$app-\\\\d+$\",namespace=~\"$namespace\"}[5m]))", - "format": "table", - "instant": true, + "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[$__rate_interval])) by (pod)", + "format": "time_series", "interval": "", "intervalFactor": 2, - "legendFormat": "", - "refId": "F", + "legendFormat": {{ `"{{pod}}"` }}, + "legendLink": null, + "refId": "A", "step": 10 } ], + "thresholds": [], "timeFrom": null, + "timeRegions": [], "timeShift": null, - "title": "Current Storage IO", - "transformations": [ + "title": "Transmit Bandwidth", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ { - "id": "merge", - "options": { - "reducers": [] - } + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true } ], - "type": "table" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 84 - }, - "id": 149, - "panels": [], - "title": "Persistent Storage Insight", - "type": "row" - }, - { - "datasource": null, - "description": "This panel describes allocated persistent storage in a table format\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "center", - "displayMode": "color-text", - "filterable": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "light-blue", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] + "yaxis": { + "align": false, + "alignLevel": null + } + } + {{- if $alerts }}, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "PG Exporter last scrape error alert", + "noDataState": "no_data", + "notifications": [] }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 6, - "w": 12, + "h": 8, + "w": 6, "x": 0, - "y": 85 + "y": 149 }, - "id": 153, + "hiddenSeries": false, + "id": 165, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", "options": { - "showHeader": true + "alertThreshold": true }, - "pluginVersion": "7.5.5", + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}", - "format": "table", - "instant": true, + "expr": "pg_exporter_last_scrape_error{job=\"$app-stats\",namespace=\"$namespace\"}", "interval": "", - "legendFormat": "", + "legendFormat": {{ `"{{pod}}"` }}, "refId": "A" } ], - "title": "Allocated Storage", - "transformations": [ + "thresholds": [ { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "endpoint": true, - "instance": true, - "job": true, - "metrics_path": true, - "namespace": true, - "node": true, - "persistentvolumeclaim": true, - "service": true - }, - "indexByName": {}, - "renameByName": { - "Value": "Volume", - "pod": "Pod" - } - } + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0, + "visible": true } ], - "type": "table" + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "PG Exporter last scrape error", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "datasource": null, - "description": "This panel shows the persistent volume usage in percentage. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [] + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.2 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgres High RollBack Rate alert", + "noDataState": "no_data", + "notifications": [] }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, "gridPos": { - "h": 6, - "w": 12, - "x": 12, - "y": 85 + "h": 8, + "w": 6, + "x": 6, + "y": 149 }, - "id": 155, + "hiddenSeries": false, + "id": 167, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", "options": { - "displayMode": "gradient", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showUnfilled": true, - "text": {} + "alertThreshold": true }, - "pluginVersion": "7.5.5", + "percentage": false, + "pluginVersion": "8.0.7", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "(kubelet_volume_stats_used_bytes / on(persistentvolumeclaim) group_left(pod) (kubelet_volume_stats_capacity_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}) )* 100", - "instant": true, + "expr": "rate(pg_stat_database_xact_rollback{job=\"$app-stats\",namespace=\"$namespace\"}[3m]) / rate(pg_stat_database_xact_commit{job=\"$app-stats\",namespace=\"$namespace\"}[3m])", "interval": "", - "legendFormat": {{ `"{{pod}}"` }}, - "refId": "A" + "legendFormat": {{ `"{{pod}}, {{datname}}"` }}, + "refId": "A" } ], - "title": "Persistent Volume Usage", - "transformations": [], - "type": "bargauge" + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.2, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Postgres High RollBack Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { - "datasource": null, - "description": "This panel describes the persistent volume usage in a time series. ", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 5000, - "axisSoftMin": 0, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", - "hideFrom": { - "graph": false, - "legend": false, - "tooltip": false - }, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + -1 + ], + "type": "gt" }, - "showPoints": "never", - "spanNulls": true - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgres Too Many Connections alert", + "noDataState": "no_data", + "notifications": [] }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": null, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 8, - "w": 24, - "x": 0, - "y": 91 + "w": 6, + "x": 12, + "y": 149 }, - "id": 151, + "hiddenSeries": false, + "id": 169, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", "options": { - "graph": {}, - "legend": { - "calcs": [ - "max", - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, - "pluginVersion": "7.5.5", + "percentage": false, + "pluginVersion": "{{ .Values.grafana.version }}", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, "targets": [ { "exemplar": true, - "expr": "(kubelet_volume_stats_used_bytes + on(persistentvolumeclaim) group_left(pod) kube_pod_spec_volumes_persistentvolumeclaims_info{pod=~\"^$app.*\",namespace=~\"$namespace\"}) ", + "expr": "sum by (pod) (pg_stat_activity_count{job=\"$app-stats\",namespace=\"$namespace\"}) - sum by (pod) (pg_settings_max_connections{job=\"$app-stats\",namespace=\"$namespace\"} * 80) / 100", "interval": "", - "intervalFactor": 1, - "legendFormat": {{ `"{{pod}}"` }}, + "legendFormat": {{ `"{{postgres}}"` }}, "refId": "A" } ], - "title": "Persistent Volume Usage History", - "type": "timeseries" - }, - { - "collapsed": false, - "datasource": "${datasource}", - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 99 + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": -1, + "visible": true + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Postgres Too Many Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" }, - "id": 144, - "panels": [], - "title": "Network Info", - "type": "row" + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } }, { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.2 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql Too Many Locks Acuired alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "Bps" - }, - "overrides": [] - }, + "datasource": null, "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 100 + "h": 8, + "w": 6, + "x": 18, + "y": 149 }, "hiddenSeries": false, - "id": 132, + "id": 161, "legend": { "avg": false, "current": false, @@ -3096,24 +5000,12 @@ }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -3123,22 +5015,27 @@ "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "sum(irate(container_network_receive_bytes_total{namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[$__rate_interval])) by (pod)", - "format": "time_series", + "exemplar": true, + "expr": "((sum by (pod) (pg_locks_count{job=\"$app-stats\",namespace=\"$namespace\"})) / (sum by (pod) (pg_settings_max_locks_per_transaction{job=\"$app-stats\",namespace=\"$namespace\"}) * sum by (pod) (pg_settings_max_connections{job=\"$app-stats\",namespace=\"$namespace\"})))", "interval": "", - "intervalFactor": 2, - "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "legendFormat": {{ `"{{postgres}}"` }}, + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.2, + "visible": true } ], - "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Receive Bandwidth", + "title": "Postgresql Too Many Locks Acuired", "tooltip": { "shared": true, "sort": 0, @@ -3154,7 +5051,7 @@ }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -3176,56 +5073,76 @@ } }, { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "1h", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "10s", + "frequency": "30s", + "handler": 1, + "name": "Postgresql split brain alert", + "noDataState": "no_data", + "notifications": [] + }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${datasource}", - "fieldConfig": { - "defaults": { - "unit": "Bps" - }, - "overrides": [] - }, + "datasource": null, + "description": "More than one primary server online", "fill": 1, "fillGradient": 0, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 100 + "h": 9, + "w": 24, + "x": 0, + "y": 157 }, "hiddenSeries": false, - "id": 134, + "id": 185, "legend": { + "alignAsTable": true, "avg": false, "current": false, "max": false, "min": false, + "rightSide": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, - "links": [], "nullPointMode": "null", "options": { - "alertThreshold": true, - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - }, - "tooltipOptions": { - "mode": "single" - } + "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.5.5", + "pluginVersion": "{{ .Values.grafana.version }}", "pointradius": 2, "points": false, "renderer": "flot", @@ -3235,22 +5152,27 @@ "steppedLine": false, "targets": [ { - "exemplar": false, - "expr": "sum(irate(container_network_transmit_bytes_total{namespace=~\"$namespace\", pod=~\"$app-\\\\d+$\"}[$__rate_interval])) by (pod)", - "format": "time_series", + "exemplar": true, + "expr": "count by (pod) (pg_replication_is_replica{job=\"$app-stats\",namespace=\"$namespace\",service=\"$app-stats\"} == 0)", "interval": "", - "intervalFactor": 2, "legendFormat": {{ `"{{pod}}"` }}, - "legendLink": null, - "refId": "A", - "step": 10 + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1, + "visible": true } ], - "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Transmit Bandwidth", + "title": "Postgresql split brain ", "tooltip": { "shared": true, "sort": 0, @@ -3266,7 +5188,7 @@ }, "yaxes": [ { - "format": "Bps", + "format": "short", "label": null, "logBase": 1, "max": null, @@ -3287,9 +5209,10 @@ "alignLevel": null } } + {{- end }} ], - "refresh": "5s", - "schemaVersion": 27, + "refresh": "", + "schemaVersion": 30, "style": "dark", "tags": [ "postgres", @@ -3318,81 +5241,84 @@ "regex": "", "skipUrlSync": false, "type": "datasource" - }, + } + {{- if not $alerts }} + , { "allValue": "\".+\"", "current": { - "selected": false, - "text": "demo", - "value": "demo" - }, - "datasource": "${datasource}", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [], - {{- if $shared }} - "query": { - "query": "label_values(kube_namespace_labels,namespace)", - "refId": "Prometheus-namespace-Variable-Query" - }, - "type": "query", - {{- else }} - "query": {{ $.Values.app.namespace | quote }}, - "type": "constant", - {{- end }} - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "useTags": false + "selected": false, + "text": "$namespace", + "value": "$namespace" }, - { - "allValue": null, - "current": { - "selected": false, - "text": "coreos-prom-postgres", - "value": "coreos-prom-postgres" - }, - "datasource": "${datasource}", - "description": null, - "error": null, - "hide": 0, - "includeAll": false, - "label": "postgres", - "multi": false, - "name": "app", - "options": [], - {{- if $shared }} - "query": { - "query": "query_result(kubedb_com_postgres_created{namespace=\"$namespace\"})", - "refId": "Prometheus-app-Variable-Query" - }, - "type": "query", - {{- else }} - "query": {{ $.Values.app.name | quote }}, - "type": "constant", - {{- end }} - "refresh": 1, - "regex": "/.*app=\"([^\"]+).*/", - "skipUrlSync": false, - "sort": 1, - "tagValuesQuery": "", - "tags": [], - "tagsQuery": "", - "useTags": false - } + "datasource": "${datasource}", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + {{- if $shared }} + "query": { + "query": "label_values(kube_namespace_labels,namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "type": "query", + {{- else }} + "query": {{ $.Values.app.namespace | quote }}, + "type": "constant", + {{- end }} + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "coreos-prom-postgres", + "value": "coreos-prom-postgres" + }, + "datasource": "${datasource}", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "postgres", + "multi": false, + "name": "app", + "options": [], + {{- if $shared }} + "query": { + "query": "query_result(kubedb_com_postgres_created{namespace=\"$namespace\"})", + "refId": "Prometheus-app-Variable-Query" + }, + "type": "query", + {{- else }} + "query": {{ $.Values.app.name | quote }}, + "type": "constant", + {{- end }} + "refresh": 1, + "regex": "/.*app=\"([^\"]+).*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "useTags": false + } + {{- end }} ] }, "time": { - "from": "now-6h", + "from": "now-1h", "to": "now" }, "timepicker": { @@ -3422,11 +5348,10 @@ }, "timezone": "", {{- if $shared }} - "title": "KubeDB / Postgres / Summary", +"title": "KubeDB / Postgres / Summary", {{- else }} "title": {{ printf "KubeDB / Postgres / Summary / %s / %s" $.Values.app.namespace $.Values.app.name | quote }}, {{- end }} - "uid": "VnOgk2Hnky", - "version": 2 -} - + "uid": "GWlOypcIk", + "version": 65 +} \ No newline at end of file