From 72469b93b526a6ae6daf70cb5d9e909bb0751248 Mon Sep 17 00:00:00 2001 From: Muzammil Date: Thu, 27 Jun 2024 15:33:33 +0500 Subject: [PATCH 1/3] feat: add cluster label in kafka, redis, mysql and mojaloop dashboards (#635) --- .../dashboard-mysql-exporter-quickstart.json | 4047 +++++++++++++++++ .../dashboard-redis-exporter-quickstart.json | 1420 ++++++ .../dashboard-kafka-topic-overview.json | 170 +- .../mojaloop/dashboard-NodeJSApplication.json | 86 +- .../mojaloop/dashboard-central-services.json | 203 +- ...dashboard-performance-troubleshooting.json | 117 +- .../mojaloop/dashboard-quoting-service.json | 98 +- .../mojaloop/dashboard-simulators.json | 624 +-- 8 files changed, 6215 insertions(+), 550 deletions(-) create mode 100644 monitoring/dashboards/datastore/dashboard-mysql-exporter-quickstart.json create mode 100644 monitoring/dashboards/datastore/dashboard-redis-exporter-quickstart.json diff --git a/monitoring/dashboards/datastore/dashboard-mysql-exporter-quickstart.json b/monitoring/dashboards/datastore/dashboard-mysql-exporter-quickstart.json new file mode 100644 index 000000000..143a979ab --- /dev/null +++ b/monitoring/dashboards/datastore/dashboard-mysql-exporter-quickstart.json @@ -0,0 +1,4047 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.2.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A quickstart to setup the Prometheus MySQL Exporter with preconfigured dashboards, alerting rules, and recording rules.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 14057, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 382, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "type": "row" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "**Uptime**\n\nThe amount of time since the last restart of the MySQL server process.", + "fieldConfig": { + "defaults": { + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 300 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 3600 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 12, + "interval": "1m", + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "calculatedInterval": "10m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_uptime{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "title": "Uptime", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "**Current QPS**\n\nBased on the queries reported by MySQL's ``SHOW STATUS`` command, it is the number of statements executed by the server within the last second. This variable includes statements executed within stored programs, unlike the Questions variable. It does not count \n``COM_PING`` or ``COM_STATISTICS`` commands.", + "fieldConfig": { + "defaults": { + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(245, 54, 54, 0.9)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 35 + }, + { + "color": "rgba(50, 172, 45, 0.97)", + "value": 75 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 13, + "interval": "1m", + "links": [ + { + "targetBlank": true, + "title": "MySQL Server Status Variables", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Queries" + } + ], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "calculatedInterval": "10m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_queries{job=~\"$job\", instance=~\"$instance\",cluster=\"$cluster\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "title": "Current QPS", + "type": "stat" + }, + { + "datasource": { + "uid": "$datasource" + }, + "description": "**InnoDB Buffer Pool Size**\n\nInnoDB maintains a storage area called the buffer pool for caching data and indexes in memory. Knowing how the InnoDB buffer pool works, and taking advantage of it to keep frequently accessed data in memory, is one of the most important aspects of MySQL tuning. The goal is to keep the working set in memory. In most cases, this should be between 60%-90% of available memory on a dedicated database host, but depends on many factors.", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 90 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 95 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 51, + "interval": "1m", + "links": [ + { + "targetBlank": true, + "title": "Tuning the InnoDB Buffer Pool Size", + "url": "https://www.percona.com/blog/2015/06/02/80-ram-tune-innodb_buffer_pool_size/" + } + ], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "calculatedInterval": "10m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_innodb_buffer_pool_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 300 + } + ], + "title": "InnoDB Buffer Pool", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 383, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Connections", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 0, + "description": "**Max Connections** \n\nMax Connections is the maximum permitted number of simultaneous client connections. By default, this is 151. Increasing this value increases the number of file descriptors that mysqld requires. If the required number of descriptors are not available, the server reduces the value of Max Connections.\n\nmysqld actually permits Max Connections + 1 clients to connect. The extra connection is reserved for use by accounts that have the SUPER privilege, such as root.\n\nMax Used Connections is the maximum number of connections that have been in use simultaneously since the server started.\n\nConnections is the number of connection attempts (successful or not) to the MySQL server.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 5 + }, + "height": "250px", + "hiddenSeries": false, + "id": 92, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "MySQL Server System Variables", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-system-variables.html#sysvar_max_connections" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Max Connections", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(max_over_time(mysql_global_status_threads_connected{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Connections", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(mysql_global_status_max_used_connections{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Max Used Connections", + "metric": "", + "refId": "C", + "step": 20, + "target": "" + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(mysql_global_variables_max_connections{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Max Connections", + "metric": "", + "refId": "B", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Connections", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Active Threads**\n\nThreads Connected is the number of open connections, while Threads Running is the number of threads not sleeping.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 5 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Peak Threads Running", + "color": "#E24D42", + "lines": false, + "pointradius": 1, + "points": true + }, + { + "alias": "Peak Threads Connected", + "color": "#1F78C1" + }, + { + "alias": "Avg Threads Running", + "color": "#EAB839" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(max_over_time(mysql_global_status_threads_connected{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "hide": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Peak Threads Connected", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(max_over_time(mysql_global_status_threads_running{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Peak Threads Running", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(avg_over_time(mysql_global_status_threads_running{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Avg Threads Running", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Client Thread Activity", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "short", + "label": "Threads", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 384, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Table Locks", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Questions**\n\nThe number of statements executed by the server. This includes only statements sent to the server by clients and not statements executed within stored programs, unlike the Queries used in the QPS calculation. \n\nThis variable does not count the following commands:\n* ``COM_PING``\n* ``COM_STATISTICS``\n* ``COM_STMT_PREPARE``\n* ``COM_STMT_CLOSE``\n* ``COM_STMT_RESET``", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 13 + }, + "hiddenSeries": false, + "id": 53, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "targetBlank": true, + "title": "MySQL Queries and Questions", + "url": "https://www.percona.com/blog/2014/05/29/how-mysql-queries-and-questions-are-measured/" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_questions{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Questions", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Thread Cache**\n\nThe thread_cache_size variable sets how many threads the server should cache to reuse. When a client disconnects, the client's threads are put in the cache if the cache is not full. It is autosized in MySQL 5.6.8 and above (capped to 100). Requests for threads are satisfied by reusing threads taken from the cache if possible, and only when the cache is empty is a new thread created.\n\n* *Threads_created*: The number of threads created to handle connections.\n* *Threads_cached*: The number of threads in the thread cache.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 13 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Tuning information", + "url": "https://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_thread_cache_size" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Threads Created", + "fill": 0 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(mysql_global_variables_thread_cache_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Thread Cache Size", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(mysql_global_status_threads_cached{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Threads Cached", + "metric": "", + "refId": "C", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_threads_created{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Threads Created", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Thread Cache", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 385, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Temporary Objects", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 22, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_created_tmp_tables{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Created Tmp Tables", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_created_tmp_disk_tables{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Created Tmp Disk Tables", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_created_tmp_files{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Created Tmp Files", + "metric": "", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Temporary Objects", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Select Types**\n\nAs with most relational databases, selecting based on indexes is more efficient than scanning an entire table's data. Here we see the counters for selects not done with indexes.\n\n* ***Select Scan*** is how many queries caused full table scans, in which all the data in the table had to be read and either discarded or returned.\n* ***Select Range*** is how many queries used a range scan, which means MySQL scanned all rows in a given range.\n* ***Select Full Join*** is the number of joins that are not joined on an index, this is usually a huge performance hit.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "height": "250px", + "hiddenSeries": false, + "id": 311, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_select_full_join{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Select Full Join", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_select_full_range_join{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Select Full Range Join", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_select_range{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Select Range", + "metric": "", + "refId": "C", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_select_range_check{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Select Range Check", + "metric": "", + "refId": "D", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_select_scan{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Select Scan", + "metric": "", + "refId": "E", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Select Types", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 386, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Sorts", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Sorts**\n\nDue to a query's structure, order, or other requirements, MySQL sorts the rows before returning them. For example, if a table is ordered 1 to 10 but you want the results reversed, MySQL then has to sort the rows to return 10 to 1.\n\nThis graph also shows when sorts had to scan a whole table or a given range of a table in order to return the results and which could not have been sorted via an index.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_sort_rows{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Sort Rows", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_sort_range{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Sort Range", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_sort_merge_passes{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Sort Merge Passes", + "metric": "", + "refId": "C", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_sort_scan{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Sort Scan", + "metric": "", + "refId": "D", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Sorts", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Slow Queries**\n\nSlow queries are defined as queries being slower than the long_query_time setting. For example, if you have long_query_time set to 3, all queries that take longer than 3 seconds to complete will show on this graph.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 29 + }, + "hiddenSeries": false, + "id": 48, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_slow_queries{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Slow Queries", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Slow Queries", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 387, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Aborted", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**Aborted Connections**\n\nWhen a given host connects to MySQL and the connection is interrupted in the middle (for example due to bad credentials), MySQL keeps that info in a system table (since 5.6 this table is exposed in performance_schema).\n\nIf the amount of failed requests without a successful connection reaches the value of max_connect_errors, mysqld assumes that something is wrong and blocks the host from further connection.\n\nTo allow connections from that host again, you need to issue the ``FLUSH HOSTS`` statement.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 37 + }, + "hiddenSeries": false, + "id": 47, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_aborted_connects{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Aborted Connects (attempts)", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_aborted_clients{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Aborted Clients (timeout)", + "metric": "", + "refId": "B", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Aborted Connections", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**Table Locks**\n\nMySQL takes a number of different locks for varying reasons. In this graph we see how many Table level locks MySQL has requested from the storage engine. In the case of InnoDB, many times the locks could actually be row locks as it only takes table level locks in a few specific cases.\n\nIt is most useful to compare Locks Immediate and Locks Waited. If Locks waited is rising, it means you have lock contention. Otherwise, Locks Immediate rising and falling is normal activity.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 37 + }, + "hiddenSeries": false, + "id": 32, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_table_locks_immediate{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Table Locks Immediate", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_table_locks_waited{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Table Locks Waited", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Table Locks", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 388, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Network", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Network Traffic**\n\nHere we can see how much network traffic is generated by MySQL. Outbound is network traffic sent from MySQL and Inbound is network traffic MySQL has received.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 6, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 45 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_bytes_received{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Inbound", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "sum(rate(mysql_global_status_bytes_sent{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Outbound", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Network Traffic", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "none", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 52 + }, + "id": 389, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Memory", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 0, + "description": "***System Memory***: Total Memory for the system.\\\n***InnoDB Buffer Pool Data***: InnoDB maintains a storage area called the buffer pool for caching data and indexes in memory.\\\n***TokuDB Cache Size***: Similar in function to the InnoDB Buffer Pool, TokuDB will allocate 50% of the installed RAM for its own cache.\\\n***Key Buffer Size***: Index blocks for MYISAM tables are buffered and are shared by all threads. key_buffer_size is the size of the buffer used for index blocks.\\\n***Adaptive Hash Index Size***: When InnoDB notices that some index values are being accessed very frequently, it builds a hash index for them in memory on top of B-Tree indexes.\\\n ***Query Cache Size***: The query cache stores the text of a SELECT statement together with the corresponding result that was sent to the client. The query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time.\\\n***InnoDB Dictionary Size***: The data dictionary is InnoDB ‘s internal catalog of tables. InnoDB stores the data dictionary on disk, and loads entries into memory while the server is running.\\\n***InnoDB Log Buffer Size***: The MySQL InnoDB log buffer allows transactions to run without having to write the log to disk before the transactions commit.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 6, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 53 + }, + "hiddenSeries": false, + "id": 50, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Detailed descriptions about metrics", + "url": "https://www.percona.com/doc/percona-monitoring-and-management/dashboard.mysql-overview.html#mysql-internal-memory-overview" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "System Memory", + "fill": 0, + "stack": false + } + ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_status_innodb_page_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"} * on (instance) mysql_global_status_buffer_pool_pages{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\", state=\"data\"})", + "format": "time_series", + "hide": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "InnoDB Buffer Pool Data", + "refId": "A", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_variables_innodb_log_buffer_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "InnoDB Log Buffer Size", + "refId": "D", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_variables_innodb_additional_mem_pool_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 2, + "legendFormat": "InnoDB Additional Memory Pool Size", + "refId": "H", + "step": 40 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_status_innodb_mem_dictionary{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "InnoDB Dictionary Size", + "refId": "F", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_variables_key_buffer_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Key Buffer Size", + "refId": "B", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_variables_query_cache_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Query Cache Size", + "refId": "C", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_status_innodb_mem_adaptive_hash{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Adaptive Hash Index Size", + "refId": "E", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(mysql_global_variables_tokudb_cache_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"})", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "TokuDB Cache Size", + "refId": "I", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Internal Memory Overview", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 390, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Command, Handlers, Processes", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**Top Command Counters**\n\nThe Com_{{xxx}} statement counter variables indicate the number of times each xxx statement has been executed. There is one status variable for each type of statement. For example, Com_delete and Com_update count [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements, respectively. Com_delete_multi and Com_update_multi are similar but apply to [``DELETE``](https://dev.mysql.com/doc/refman/5.7/en/delete.html) and [``UPDATE``](https://dev.mysql.com/doc/refman/5.7/en/update.html) statements that use multiple-table syntax.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 61 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Server Status Variables (Com_xxx)", + "url": "https://dev.mysql.com/doc/refman/5.7/en/server-status-variables.html#statvar_Com_xxx" + } + ], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "topk(5, rate(mysql_global_status_commands_total{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval])>0)", + "format": "time_series", + "hide": false, + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Com_{{ command }}", + "metric": "", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Top Command Counters", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Handlers**\n\nHandler statistics are internal statistics on how MySQL is selecting, updating, inserting, and modifying rows, tables, and indexes.\n\nThis is in fact the layer between the Storage Engine and MySQL.\n\n* `read_rnd_next` is incremented when the server performs a full table scan and this is a counter you don't really want to see with a high value.\n* `read_key` is incremented when a read is done with an index.\n* `read_next` is incremented when the storage engine is asked to 'read the next index entry'. A high value means a lot of index scans are being done.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 68 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_handlers_total{instance=~\"$host\", handler!~\"commit|rollback|savepoint.*|prepare\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_handlers_total{instance=~\"$host\", handler!~\"commit|rollback|savepoint.*|prepare\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "metric": "", + "refId": "J", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Handlers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 75 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_handlers_total{instance=~\"$host\", handler=~\"commit|rollback|savepoint.*|prepare\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_handlers_total{instance=~\"$host\", handler=~\"commit|rollback|savepoint.*|prepare\", cluster=\"$cluster\"}[5m])", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{ handler }}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Transaction Handlers", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 0, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 82 + }, + "hiddenSeries": false, + "id": 40, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_info_schema_threads{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{ state }}", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Process States", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 6, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 89 + }, + "hiddenSeries": false, + "id": 49, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideZero": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "topk(5, avg_over_time(mysql_info_schema_threads{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[1h]))", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "{{ state }}", + "metric": "", + "refId": "A", + "step": 3600 + } + ], + "thresholds": [], + "timeFrom": "24h", + "timeRegions": [], + "title": "Top Process States Hourly", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 391, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Query Cache", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Query Cache Memory**\n\nThe query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time. This serialization is true not only for SELECTs, but also for INSERT/UPDATE/DELETE.\n\nThis also means that the larger the `query_cache_size` is set to, the slower those operations become. In concurrent environments, the MySQL Query Cache quickly becomes a contention point, decreasing performance. MariaDB and AWS Aurora have done work to try and eliminate the query cache contention in their flavors of MySQL, while MySQL 8.0 has eliminated the query cache feature.\n\nThe recommended settings for most environments is to set:\n ``query_cache_type=0``\n ``query_cache_size=0``\n\nNote that while you can dynamically change these values, to completely remove the contention point you have to restart the database.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 97 + }, + "hiddenSeries": false, + "id": 46, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_qcache_free_memory{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Free Memory", + "metric": "", + "refId": "F", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_query_cache_size{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Query Cache Size", + "metric": "", + "refId": "E", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Query Cache Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Query Cache Activity**\n\nThe query cache has huge scalability problems in that only one thread can do an operation in the query cache at the same time. This serialization is true not only for SELECTs, but also for INSERT/UPDATE/DELETE.\n\nThis also means that the larger the `query_cache_size` is set to, the slower those operations become. In concurrent environments, the MySQL Query Cache quickly becomes a contention point, decreasing performance. MariaDB and AWS Aurora have done work to try and eliminate the query cache contention in their flavors of MySQL, while MySQL 8.0 has eliminated the query cache feature.\n\nThe recommended settings for most environments is to set:\n``query_cache_type=0``\n``query_cache_size=0``\n\nNote that while you can dynamically change these values, to completely remove the contention point you have to restart the database.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 97 + }, + "height": "", + "hiddenSeries": false, + "id": 45, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_qcache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_qcache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Hits", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_qcache_inserts{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_qcache_inserts{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Inserts", + "metric": "", + "refId": "C", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_qcache_not_cached{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_qcache_not_cached{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Not Cached", + "metric": "", + "refId": "D", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_qcache_lowmem_prunes{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_qcache_lowmem_prunes{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Prunes", + "metric": "", + "refId": "F", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_qcache_queries_in_cache{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Queries in Cache", + "metric": "", + "refId": "E", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Query Cache Activity", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 104 + }, + "id": 392, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Files and Tables", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 105 + }, + "hiddenSeries": false, + "id": 43, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_opened_files{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_opened_files{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Openings", + "metric": "", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL File Openings", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 105 + }, + "hiddenSeries": false, + "id": 41, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_open_files{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Open Files", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_open_files_limit{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Open Files Limit", + "metric": "", + "refId": "D", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "mysql_global_status_innodb_num_open_files{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "InnoDB Open Files", + "refId": "B", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Open Files", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 112 + }, + "id": 393, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "Table Openings", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Table Open Cache Status**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 113 + }, + "hiddenSeries": false, + "id": 44, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Server Status Variables (table_open_cache)", + "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Table Open Cache Hit Ratio", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "rate(mysql_global_status_opened_tables{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_opened_tables{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Openings", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Hits", + "refId": "B", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Misses", + "refId": "C", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(mysql_global_status_table_open_cache_overflows{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_overflows{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Misses due to Overflows", + "refId": "D", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "(rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m]))/((rate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_hits{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m]))+(rate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_table_open_cache_misses{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])))", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Table Open Cache Hit Ratio", + "refId": "E", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Table Open Cache Status", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "percentunit", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Open Tables**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 113 + }, + "hiddenSeries": false, + "id": 42, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Server Status Variables (table_open_cache)", + "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_open_tables{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Open Tables", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_table_open_cache{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Table Open Cache", + "metric": "", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Open Tables", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "uid": "$datasource" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 120 + }, + "id": 394, + "panels": [], + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "refId": "A" + } + ], + "title": "MySQL Table Definition Cache", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "**MySQL Table Definition Cache**\n\nThe recommendation is to set the `table_open_cache_instances` to a loose correlation to virtual CPUs, keeping in mind that more instances means the cache is split more times. If you have a cache set to 500 but it has 10 instances, each cache will only have 50 cached.\n\nThe `table_definition_cache` and `table_open_cache` can be left as default as they are auto-sized MySQL 5.6 and above (ie: do not set them to any value).", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 2, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 121 + }, + "hiddenSeries": false, + "id": 54, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [ + { + "title": "Server Status Variables (table_open_cache)", + "url": "http://dev.mysql.com/doc/refman/5.6/en/server-system-variables.html#sysvar_table_open_cache" + } + ], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Opened Table Definitions", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_status_open_table_definitions{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Open Table Definitions", + "metric": "", + "refId": "B", + "step": 20 + }, + { + "calculatedInterval": "2m", + "datasource": { + "uid": "$datasource" + }, + "datasourceErrors": {}, + "errors": {}, + "expr": "mysql_global_variables_table_definition_cache{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Table Definitions Cache Size", + "metric": "", + "refId": "C", + "step": 20 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "rate(mysql_global_status_opened_table_definitions{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[$__interval]) or irate(mysql_global_status_opened_table_definitions{job=~\"$job\", instance=~\"$instance\", cluster=\"$cluster\"}[5m])", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Opened Table Definitions", + "refId": "A", + "step": 20 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "MySQL Table Definition Cache", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "Prometheus", + "value": "d876eea7-4966-4d75-84fc-eeadd6e97488" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": {}, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(mysql_up{cluster=\"$cluster\"},job)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "job", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(mysql_up{cluster=\"$cluster\"},job)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": {}, + "datasource": { + "uid": "$datasource" + }, + "definition": "label_values(mysql_up{job=~\"$job\"},instance)", + "hide": 0, + "includeAll": true, + "multi": true, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(mysql_up{job=~\"$job\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "collapse": false, + "enable": true, + "hidden": false, + "notice": false, + "now": true, + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "status": "Stable", + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ], + "type": "timepicker" + }, + "timezone": "", + "title": "MySQL Exporter Quickstart and Dashboard", + "uid": "549c2bf8936f7767ea6ac47c47b00f2a-v002", + "version": 1, + "weekStart": "" +} diff --git a/monitoring/dashboards/datastore/dashboard-redis-exporter-quickstart.json b/monitoring/dashboards/datastore/dashboard-redis-exporter-quickstart.json new file mode 100644 index 000000000..97a3f7b15 --- /dev/null +++ b/monitoring/dashboards/datastore/dashboard-redis-exporter-quickstart.json @@ -0,0 +1,1420 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.2.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph (old)", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A quickstart to setup the Prometheus Redis Exporter with preconfigured dashboards, alerting rules, and recording rules.", + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 14091, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 24, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "refId": "A" + } + ], + "title": "Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Average taken across instances", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "avg(irate(redis_commands_total{instance=~\"$instance\",cluster=\"$cluster\"} [1m])) by (cmd)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{cmd}}", + "metric": "redis_command_calls_total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Commands per second", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Average taken across instances", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 1 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "avg(irate(redis_commands_duration_seconds_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m])) by (cmd)\n /\navg(irate(redis_commands_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m])) by (cmd)\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ cmd }}", + "metric": "redis_command_calls_total", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Command latency per second", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Hit ratio": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "decimals": 2, + "description": "Hit rate shows the percentage of key space lookups that hit a key.", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 1 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": true, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/Target/", + "color": "#56A64B", + "dashes": true, + "fill": 0, + "hideTooltip": true, + "linewidth": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "avg(irate(redis_keyspace_hits_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m]) / (irate(redis_keyspace_misses_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m]) + irate(redis_keyspace_hits_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m]))) by (instance)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "", + "refId": "A", + "step": 240, + "target": "" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "1", + "interval": "", + "legendFormat": "Target hit ratio for cache", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Hit ratio per instance", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": "", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 22, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "refId": "A" + } + ], + "title": "Memory", + "type": "row" + }, + { + "aliasColors": { + "max": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Total taken across instances", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/max/", + "color": "#E02F44", + "dashes": true, + "fill": 0, + "linewidth": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_memory_used_bytes{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Used Memory", + "metric": "", + "refId": "A", + "step": 240, + "target": "" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_memory_max_bytes{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Configured max memory", + "refId": "B", + "step": 240 + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_memory_used_rss_bytes{instance=~\"$instance\",cluster=\"$cluster\"})", + "interval": "", + "legendFormat": "Used RSS memory", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Total Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Recommend restart redis": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 9 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/restart/", + "color": "#E02F44", + "dashes": true, + "fill": 0, + "linewidth": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "redis_memory_fragmentation_ratio{instance=~\"$instance\",cluster=\"$cluster\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "C" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory fragmentation ratio per instance", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "Evictions": "red", + "evicts": "#890F02", + "memcached_items_evicted_total{instance=\"172.17.0.1:9150\",job=\"prometheus\"}": "#890F02", + "reclaims": "#3F6833", + "{container=\"redis-exporter\", instance=\"redis-86cb5d76d7-fcdln:redis-exporter:redis-metrics\", job=\"default/redis\", namespace=\"default\", pod=\"redis-86cb5d76d7-fcdln\"}": "red", + "{instance=\"redis-86cb5d76d7-fcdln:redis-exporter:redis-metrics\"}": "red" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 9 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "reclaims", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "irate(redis_evicted_keys_total{instance=~\"$instance\",cluster=\"$cluster\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Key evictions per second per instance", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 26, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "refId": "A" + } + ], + "title": "Basic activity", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Sum taken across instances", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_connected_clients{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Connected", + "refId": "A" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_blocked_clients{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Blocked", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Connected/Blocked Clients", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": { + "db1": "yellow" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Sum taken across instances", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 17 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum (redis_db_keys{instance=~\"$instance\",cluster=\"$cluster\"}) by (db)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ db }}", + "refId": "A", + "step": 240, + "target": "" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Total Items per DB", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "Sum taken across instances", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 17 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum (redis_db_keys{instance=~\"$instance\",cluster=\"$cluster\"}) - sum (redis_db_keys_expiring{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Not expiring", + "refId": "A", + "step": 240, + "target": "" + }, + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_db_keys_expiring{instance=~\"$instance\",cluster=\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Expiring", + "metric": "", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Expiring vs Not-Expiring Keys", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "This metric will only be non-zero if the instance is a master", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 24 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "sum(redis_connected_slaves{instance=~\"$instance\",cluster=\"$cluster\"}) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Connected slaves by instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "uid": "$datasource" + }, + "description": "This metric is only exported if the instance is a slave.", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 24 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "10.2.3", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "uid": "$datasource" + }, + "expr": "redis_master_last_io_seconds_ago{instance=~\"$instance\",cluster=\"$cluster\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Time since last master connection", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [ + "prometheus", + "redis" + ], + "templating": { + "list": [ + { + "current": {}, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(redis_up{cluster=\"$cluster\"},instance)", + "hide": 0, + "includeAll": false, + "multi": true, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(redis_up{cluster=\"$cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": "Prometheus", + "value": "57175393-0d42-4204-ae14-87edf79b3b1a" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Redis Exporter Quickstart and Dashboard", + "uid": "bRd48yKMdd-v002", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/monitoring/dashboards/messaging/dashboard-kafka-topic-overview.json b/monitoring/dashboards/messaging/dashboard-kafka-topic-overview.json index 3d0812538..eb5c7597c 100644 --- a/monitoring/dashboards/messaging/dashboard-kafka-topic-overview.json +++ b/monitoring/dashboards/messaging/dashboard-kafka-topic-overview.json @@ -23,12 +23,6 @@ "name": "Grafana", "version": "10.2.3" }, - { - "type": "panel", - "id": "graph", - "name": "Graph (old)", - "version": "" - }, { "type": "datasource", "id": "prometheus", @@ -173,7 +167,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(kafka_consumergroup_lag{instance=\"$instance\",topic=~\"$topic\"}) by (consumergroup, topic) ", + "expr": "sum(kafka_consumergroup_lag{instance=\"$instance\",topic=~\"$topic\",cluster=~\"$cluster\"}) by (consumergroup, topic) ", "format": "time_series", "instant": false, "interval": "", @@ -292,7 +286,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(delta(kafka_topic_partition_current_offset{instance=~'$instance', topic=~\"$topic\"}[5m])/5) by (topic)", + "expr": "sum(delta(kafka_topic_partition_current_offset{instance=~'$instance', topic=~\"$topic\",cluster=~\"$cluster\"}[5m])/5) by (topic)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{topic}}", @@ -397,7 +391,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(delta(kafka_consumergroup_current_offset{instance=~'$instance',topic=~\"$topic\"}[5m])/5) by (consumergroup, topic)", + "expr": "sum(delta(kafka_consumergroup_current_offset{instance=~'$instance',topic=~\"$topic\",cluster=~\"$cluster\"}[5m])/5) by (consumergroup, topic)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{topic}}/{{consumergroup}}", @@ -487,7 +481,7 @@ "calcs": [ "lastNotNull" ], - "displayMode": "list", + "displayMode": "table", "placement": "right", "showLegend": true }, @@ -509,7 +503,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kafka_log_log_size{topic=~\"$topic\"}) by (topic)", + "expr": "sum(kafka_log_log_size{topic=~\"$topic\",cluster=~\"$cluster\"}) by (topic)", "instant": true, "legendFormat": "__auto", "range": false, @@ -533,58 +527,86 @@ "type": "row" }, { - "aliasColors": {}, - "bars": true, - "dashLength": 10, - "dashes": false, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { - "links": [] + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] }, - "fill": 1, - "fillGradient": 0, "gridPos": { - "h": 7, - "w": 20, + "h": 5, + "w": 24, "x": 0, "y": 21 }, - "hiddenSeries": false, "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "sideWidth": 420, - "total": false, - "values": true - }, - "lines": false, - "linewidth": 1, "links": [], - "nullPointMode": "null", "options": { - "alertThreshold": true + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 }, - "percentage": false, "pluginVersion": "10.2.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, "targets": [ { "datasource": { @@ -593,7 +615,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by(topic) (kafka_topic_partitions{instance=\"$instance\",topic=~\"$topic\"})", + "expr": "sum by(topic) (kafka_topic_partitions{instance=\"$instance\",topic=~\"$topic\",cluster=~\"$cluster\"})", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -602,37 +624,8 @@ "refId": "A" } ], - "thresholds": [], - "timeRegions": [], "title": "Partitions per Topic", - "tooltip": { - "shared": false, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "mode": "series", - "show": false, - "values": [ - "current" - ] - }, - "yaxes": [ - { - "format": "short", - "logBase": 1, - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } + "type": "barchart" } ], "refresh": "", @@ -642,6 +635,29 @@ ], "templating": { "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, { "current": {}, "datasource": { @@ -743,7 +759,7 @@ }, "timezone": "browser", "title": "Kafka - Topic Overview", - "uid": "jwPKIsniow-05", + "uid": "jwPKIsniow-v006", "version": 1, "weekStart": "" } diff --git a/monitoring/dashboards/mojaloop/dashboard-NodeJSApplication.json b/monitoring/dashboards/mojaloop/dashboard-NodeJSApplication.json index 3216fcce8..5a9d36441 100644 --- a/monitoring/dashboards/mojaloop/dashboard-NodeJSApplication.json +++ b/monitoring/dashboards/mojaloop/dashboard-NodeJSApplication.json @@ -15,7 +15,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "10.0.2" + "version": "10.2.3" }, { "type": "datasource", @@ -85,6 +85,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -98,6 +99,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -168,7 +170,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_process_resident_memory_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, kubernetes_pod_name, serviceName) ", + "expr": "sum(${prefix}_process_resident_memory_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, kubernetes_pod_name, serviceName) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "Process Memory - {{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -181,7 +183,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_heap_size_total_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, kubernetes_pod_name, serviceName) ", + "expr": "sum(${prefix}_nodejs_heap_size_total_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, kubernetes_pod_name, serviceName) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "Heap Total - {{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -194,7 +196,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_heap_size_used_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, kubernetes_pod_name, serviceName) ", + "expr": "sum(${prefix}_nodejs_heap_size_used_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, kubernetes_pod_name, serviceName) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "Heap Used - {{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -207,7 +209,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_external_memory_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, kubernetes_pod_name, serviceName) ", + "expr": "sum(${prefix}_nodejs_external_memory_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, kubernetes_pod_name, serviceName) ", "format": "time_series", "intervalFactor": 1, "legendFormat": "External Memory - {{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -229,6 +231,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -242,6 +245,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -312,7 +316,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(${prefix}_process_cpu_user_seconds_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}[$__rate_interval]) * 100) by (instance, kubernetes_pod_name, serviceName) ", + "expr": "avg(rate(${prefix}_process_cpu_user_seconds_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}[$__rate_interval]) * 100) by (instance, kubernetes_pod_name, serviceName) ", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -370,8 +374,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -418,7 +421,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_active_handles_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name)", + "expr": "sum(${prefix}_nodejs_active_handles_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Active Handler - {{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -431,7 +434,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_active_requests_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name)", + "expr": "sum(${prefix}_nodejs_active_requests_total{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -536,7 +539,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(${prefix}_nodejs_eventloop_lag_seconds{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name)", + "expr": "avg(${prefix}_nodejs_eventloop_lag_seconds{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}} {{serviceName}} {{kubernetes_pod_name}}", @@ -641,7 +644,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_heap_space_size_used_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name, space)", + "expr": "sum(${prefix}_nodejs_heap_space_size_used_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name, space)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Heap Used - {{instance}} {{serviceName}} {{kubernetes_pod_name}} - {{space}}", @@ -745,7 +748,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_heap_space_size_total_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name, space)", + "expr": "sum(${prefix}_nodejs_heap_space_size_total_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name, space)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Heap Total - {{instance}} {{serviceName}} {{kubernetes_pod_name}} - {{space}}", @@ -850,7 +853,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(${prefix}_nodejs_heap_space_size_available_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, serviceName, kubernetes_pod_name, space)", + "expr": "avg(${prefix}_nodejs_heap_space_size_available_bytes{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, serviceName, kubernetes_pod_name, space)", "format": "time_series", "intervalFactor": 1, "legendFormat": "Heap Used - {{instance}} {{serviceName}} {{kubernetes_pod_name}} - {{space}}", @@ -930,7 +933,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(${prefix}_nodejs_version_info{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}) by (instance, kubernetes_pod_name, serviceName, version)", + "expr": "sum(${prefix}_nodejs_version_info{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}) by (instance, kubernetes_pod_name, serviceName, version)", "format": "time_series", "instant": false, "interval": "", @@ -1011,7 +1014,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(changes(${prefix}_process_start_time_seconds{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\"}[$__rate_interval])) by (instance, serviceName, kubernetes_pod_name)", + "expr": "sum(changes(${prefix}_process_start_time_seconds{instance=~\"$instance\",kubernetes_pod_name=~\"$podName\", serviceName=~\"$serviceName\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, serviceName, kubernetes_pod_name)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", @@ -1024,18 +1027,36 @@ } ], "refresh": "10s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [ "nodejs" ], "templating": { "list": [ + { + "current": {}, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, { "current": { - "selected": true, - "text": "cbs", - "value": "cbs" + "selected": false, + "text": "moja_cl", + "value": "moja_cl" }, "hide": 0, "includeAll": false, @@ -1044,7 +1065,7 @@ "name": "prefix", "options": [ { - "selected": false, + "selected": true, "text": "moja_cl", "value": "moja_cl" }, @@ -1059,7 +1080,7 @@ "value": "moja_als" }, { - "selected": true, + "selected": false, "text": "cbs", "value": "cbs" } @@ -1075,7 +1096,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "definition": "label_values(${prefix}_nodejs_version_info,instance)", + "definition": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},instance)", "hide": 0, "includeAll": true, "label": "instance", @@ -1083,7 +1104,8 @@ "name": "instance", "options": [], "query": { - "query": "label_values(${prefix}_nodejs_version_info,instance)", + "qryType": 1, + "query": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},instance)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1101,7 +1123,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "definition": "label_values(${prefix}_nodejs_version_info,serviceName)", + "definition": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},serviceName)", "hide": 0, "includeAll": true, "label": "serviceName", @@ -1109,7 +1131,8 @@ "name": "serviceName", "options": [], "query": { - "query": "label_values(${prefix}_nodejs_version_info,serviceName)", + "qryType": 1, + "query": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},serviceName)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1127,7 +1150,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "definition": "label_values(${prefix}_nodejs_version_info,kubernetes_pod_name)", + "definition": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},kubernetes_pod_name)", "hide": 0, "includeAll": true, "label": "podName", @@ -1135,7 +1158,8 @@ "name": "podName", "options": [], "query": { - "query": "label_values(${prefix}_nodejs_version_info,kubernetes_pod_name)", + "qryType": 1, + "query": "label_values(${prefix}_nodejs_version_info{cluster=\"$cluster\"},kubernetes_pod_name)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1180,7 +1204,7 @@ }, "timezone": "", "title": "NodeJS Application Dashboard", - "uid": "PTSqcpJWk", + "uid": "PTSqcpJWk-v001", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/monitoring/dashboards/mojaloop/dashboard-central-services.json b/monitoring/dashboards/mojaloop/dashboard-central-services.json index c11dcab23..7921ad5bc 100644 --- a/monitoring/dashboards/mojaloop/dashboard-central-services.json +++ b/monitoring/dashboards/mojaloop/dashboard-central-services.json @@ -21,7 +21,7 @@ "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "10.0.2" + "version": "10.2.3" }, { "type": "datasource", @@ -162,9 +162,10 @@ "fields": "", "values": false }, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -172,7 +173,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\"}[$__rate_interval]))", + "expr": "sum(rate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\", cluster=\"$cluster\"}[$__rate_interval]))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -243,9 +244,10 @@ "fields": "", "values": false }, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -253,7 +255,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval]))", + "expr": "sum(rate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval]))", "format": "time_series", "instant": true, "intervalFactor": 1, @@ -323,9 +325,10 @@ "fields": "", "values": false }, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -333,7 +336,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\"}[$__rate_interval]))", + "expr": "sum(rate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\", cluster=\"$cluster\"}[$__rate_interval]))", "format": "time_series", "instant": true, "interval": "", @@ -382,6 +385,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -395,6 +399,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -464,7 +469,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(kafka_consumergroup_lag{topic=~\"topic-.*\", consumergroup=~\"cl-group-.*\"}) by (consumergroup, topic)", + "expr": "sum(kafka_consumergroup_lag{topic=~\"topic-.*\", consumergroup=~\"cl-group-.*\", cluster=\"$cluster\"}) by (consumergroup, topic)", "format": "time_series", "hide": false, "instant": false, @@ -514,6 +519,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -527,6 +533,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -598,7 +605,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", "format": "time_series", "hide": false, "interval": "", @@ -613,7 +620,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval])) by (app_kubernetes_io_name, action, success, instance)", + "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (app_kubernetes_io_name, action, success, instance)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -627,7 +634,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", "hide": false, "legendFormat": "{{instance}} {{app_kubernetes_io_name}} Avg - success:{{success}}", "range": true, @@ -639,7 +646,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (app_kubernetes_io_name, success, instance)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -653,7 +660,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval])) by (success, instance)", + "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (success, instance)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -667,7 +674,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", "hide": true, "legendFormat": "{{instance}} {{kubernetes_pod_name}} - success:{{success}}", "range": true, @@ -679,7 +686,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", "hide": true, "legendFormat": "{{instance}} {{kubernetes_pod_name}} - success:{{success}}", "range": true, @@ -691,7 +698,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", + "expr": "sum(irate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval])) by (kubernetes_pod_name, success, instance)", "hide": true, "legendFormat": "{{instance}} {{kubernetes_pod_name}} - success:{{success}}", "range": true, @@ -703,7 +710,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=\"ml-service\"}[$__rate_interval])) by (app, success, instance)", + "expr": "sum(irate(moja_cl_transfer_prepare_count{serviceName=\"ml-service\", cluster=\"$cluster\"}[$__rate_interval])) by (app, success, instance)", "hide": true, "legendFormat": "ML Transfers API Prepare - success:{{success}}", "range": true, @@ -724,6 +731,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -737,6 +745,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -760,8 +769,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -850,7 +858,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_cl_transfer_prepare_sum{serviceName=~\"central-handler-prepare|central-service\"}[$__rate_interval]) / rate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", + "expr": "avg(rate(moja_cl_transfer_prepare_sum{serviceName=~\"central-handler-prepare|central-service\", cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_cl_transfer_prepare_count{serviceName=~\"central-handler-prepare|central-service\", cluster=\"$cluster\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -864,7 +872,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_cl_transfer_position_sum{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval]) / rate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", + "expr": "avg(rate(moja_cl_transfer_position_sum{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_cl_transfer_position_count{serviceName=~\"central-handler-position|central-service\", cluster=\"$cluster\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -878,7 +886,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_cl_transfer_fulfil_sum{serviceName=~\"central-handler-fulfil|central-service\"}[$__rate_interval]) / rate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", + "expr": "avg(rate(moja_cl_transfer_fulfil_sum{serviceName=~\"central-handler-fulfil|central-service\", cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_cl_transfer_fulfil_count{serviceName=~\"central-handler-fulfil|central-service\", cluster=\"$cluster\"}[$__rate_interval]) >=0) by (kubernetes_pod_name, success, instance)", "hide": false, "legendFormat": "Transfer Fulfil - {{kubernetes_pod_name}} {{instance}} success:{{success}}", "range": true, @@ -943,8 +951,7 @@ "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "#299c46" }, { "color": "rgba(237, 129, 40, 0.89)", @@ -970,6 +977,8 @@ "links": [], "maxDataPoints": 100, "options": { + "minVizHeight": 200, + "minVizWidth": 200, "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -979,9 +988,10 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -989,7 +999,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger.*'}[2m])) / sum (machine_cpu_cores{}) * 100", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger.*', cluster=\"$cluster\"}[2m])) / sum (machine_cpu_cores{cluster=\"$cluster\"}) * 100", "format": "time_series", "hide": true, "instant": true, @@ -1002,7 +1012,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger.*'}[2m]))", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger.*', cluster=\"$cluster\"}[2m]))", "refId": "B" } ], @@ -1038,8 +1048,7 @@ "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "#299c46" }, { "color": "rgba(237, 129, 40, 0.89)", @@ -1065,6 +1074,8 @@ "links": [], "maxDataPoints": 100, "options": { + "minVizHeight": 200, + "minVizWidth": 200, "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -1074,16 +1085,17 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-prepare.*'}[2m])) / sum (machine_cpu_cores{}) * 100", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-prepare.*', cluster=\"$cluster\"}[2m])) / sum (machine_cpu_cores{cluster=\"$cluster\"}) * 100", "format": "time_series", "hide": true, "instant": true, @@ -1096,7 +1108,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-prepare.*'}[2m]))", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-prepare.*', cluster=\"$cluster\"}[2m]))", "refId": "B" } ], @@ -1132,8 +1144,7 @@ "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "#299c46" }, { "color": "rgba(237, 129, 40, 0.89)", @@ -1159,6 +1170,8 @@ "links": [], "maxDataPoints": 100, "options": { + "minVizHeight": 200, + "minVizWidth": 200, "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -1168,16 +1181,17 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-position.*'}[2m])) / sum (machine_cpu_cores{}) * 100", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-position.*', cluster=\"$cluster\"}[2m])) / sum (machine_cpu_cores{cluster=\"$cluster\"}) * 100", "format": "time_series", "hide": true, "instant": true, @@ -1190,7 +1204,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-position.*'}[2m]))", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-position.*', cluster=\"$cluster\"}[2m]))", "refId": "B" } ], @@ -1226,8 +1240,7 @@ "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "#299c46" }, { "color": "rgba(237, 129, 40, 0.89)", @@ -1253,6 +1266,8 @@ "links": [], "maxDataPoints": 100, "options": { + "minVizHeight": 200, + "minVizWidth": 200, "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -1262,16 +1277,17 @@ "values": false }, "showThresholdLabels": false, - "showThresholdMarkers": true + "showThresholdMarkers": true, + "sizing": "auto" }, - "pluginVersion": "10.0.2", + "pluginVersion": "10.2.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-fulfil.*'}[2m])) / sum (machine_cpu_cores{}) * 100", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-fulfil.*', cluster=\"$cluster\"}[2m])) / sum (machine_cpu_cores{ cluster=\"$cluster\"}) * 100", "format": "time_series", "hide": true, "instant": true, @@ -1284,7 +1300,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-fulfil.*'}[2m]))", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-fulfil.*', cluster=\"$cluster\"}[2m]))", "refId": "B" } ], @@ -1328,6 +1344,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1341,6 +1358,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1364,8 +1382,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1452,7 +1469,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_cl_nodejs_heap_space_size_available_bytes{serviceName=~\"central-handler.*|central-service\"}", + "expr": "moja_cl_nodejs_heap_space_size_available_bytes{serviceName=~\"central-handler.*|central-service\", cluster=\"$cluster\"}", "hide": false, "legendFormat": "heap_space_free-{{kubernetes_pod_name}} {{serviceName}} {{space}}", "range": true, @@ -1464,7 +1481,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_cl_nodejs_heap_space_size_used_bytes{serviceName=~\"central-handler.*|central-service\"}", + "expr": "moja_cl_nodejs_heap_space_size_used_bytes{serviceName=~\"central-handler.*|central-service\", cluster=\"$cluster\"}", "hide": false, "legendFormat": " heap_space_used-{{kubernetes_pod_name}} {{serviceName}} {{space}} ", "range": true, @@ -1476,7 +1493,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_cl_nodejs_heap_size_total_bytes{serviceName=~\"central-handler.*|central-service\"}", + "expr": "moja_cl_nodejs_heap_size_total_bytes{serviceName=~\"central-handler.*|central-service\", cluster=\"$cluster\"}", "hide": false, "legendFormat": "heap_total-{{kubernetes_pod_name}} {{serviceName}}", "range": true, @@ -1488,7 +1505,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_cl_nodejs_heap_size_used_bytes{serviceName=~\"central-handler.*|central-service\"}", + "expr": "moja_cl_nodejs_heap_size_used_bytes{serviceName=~\"central-handler.*|central-service\", cluster=\"$cluster\"}", "hide": false, "legendFormat": " heap_used-{{kubernetes_pod_name}} {{serviceName}}", "range": true, @@ -1500,7 +1517,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_cl_nodejs_external_memory_bytes{serviceName=~\"central-handler.*|central-service\"}", + "expr": "moja_cl_nodejs_external_memory_bytes{serviceName=~\"central-handler.*|central-service\", cluster=\"$cluster\"}", "hide": false, "legendFormat": "ext_memory-{{kubernetes_pod_name}} {{serviceName}}", "range": true, @@ -1522,6 +1539,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1535,6 +1553,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1557,8 +1576,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1644,7 +1662,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (moja_cl_process_cpu_seconds_total{serviceName=~\"central.*\"}[$__rate_interval])) by (app)", + "expr": "sum (irate (moja_cl_process_cpu_seconds_total{serviceName=~\"central.*\", cluster=\"$cluster\"}[$__rate_interval])) by (app)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1656,7 +1674,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(\n irate(container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-.*|.*centralledger-service.*'}[$__rate_interval]))\nby (pod_name)", + "expr": "sum(\n irate(container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-.*|.*centralledger-service.*', cluster=\"$cluster\"}[$__rate_interval]))\nby (pod_name)", "format": "time_series", "hide": true, "instant": false, @@ -1669,7 +1687,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-.*|.*centralledger-service.*'}[$__rate_interval]))", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*centralledger-handler-transfer-.*|.*centralledger-service.*', cluster=\"$cluster\"}[$__rate_interval]))", "format": "time_series", "hide": true, "instant": false, @@ -1682,7 +1700,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (rate (container_cpu_usage_seconds_total{namespace='test', pod_name=~'test-centralledger-hand.*|test-centralledger-serv.*|test-centralledger-handler-transfer-fulfil.*'}[1m])) by (instance)", + "expr": "sum (rate (container_cpu_usage_seconds_total{namespace='test', pod_name=~'test-centralledger-hand.*|test-centralledger-serv.*|test-centralledger-handler-transfer-fulfil.*', cluster=\"$cluster\"}[1m])) by (instance)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1693,7 +1711,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum (irate (container_cpu_usage_seconds_total{}[1m])) by (instance)", + "expr": "sum (irate (container_cpu_usage_seconds_total{ cluster=\"$cluster\"}[1m])) by (instance)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1704,7 +1722,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "irate(node_cpu_seconds_total{}[5m]})", + "expr": "irate(node_cpu_seconds_total{cluster=\"$cluster\"}[5m]})", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1716,7 +1734,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "kube_node_status_capacity_cpu_cores", + "expr": "kube_node_status_capacity_cpu_cores{cluster=\"$cluster\"}", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1727,7 +1745,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "kube_node_status_allocatable_cpu_cores", + "expr": "kube_node_status_allocatable_cpu_cores{ cluster=\"$cluster\"}", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1738,7 +1756,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(node_load1) by (instance)", + "expr": "sum(node_load1{ cluster=\"$cluster\"}) by (instance)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1749,7 +1767,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(node_load1)", + "expr": "sum(node_load1{ cluster=\"$cluster\"})", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1760,7 +1778,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(\n irate(container_cpu_usage_seconds_total{beta_kubernetes_io_instance_type=\"m4.2xlarge\"}[$__rate_interval]))\nby (pod_name)", + "expr": "sum(\n irate(container_cpu_usage_seconds_total{beta_kubernetes_io_instance_type=\"m4.2xlarge\", cluster=\"$cluster\"}[$__rate_interval]))\nby (pod_name)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1771,7 +1789,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(\n irate(container_cpu_usage_seconds_total{}[$__rate_interval]))", + "expr": "sum(\n irate(container_cpu_usage_seconds_total{ cluster=\"$cluster\"}[$__rate_interval]))", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1782,7 +1800,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "rate(moja_cl_process_cpu_seconds_total{serviceName=~\"central.*\"}[$__rate_interval]) * 100", + "expr": "rate(moja_cl_process_cpu_seconds_total{serviceName=~\"central.*\", cluster=\"$cluster\"}[$__rate_interval]) * 100", "legendFormat": "{{kubernetes_pod_name}}", "refId": "E" } @@ -1827,6 +1845,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1840,6 +1859,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1862,8 +1882,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1908,7 +1927,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(moja_cl_nodejs_eventloop_lag_seconds{serviceName=~\"central.*\"}) by (serviceName, instance)", + "expr": "avg(moja_cl_nodejs_eventloop_lag_seconds{serviceName=~\"central.*\", cluster=\"$cluster\"}) by (serviceName, instance)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{serviceName}} {{instance}}", @@ -1930,6 +1949,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1943,6 +1963,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1966,8 +1987,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2009,7 +2029,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "kube_deployment_spec_replicas{deployment=~\".*-centralledger-.*|central-service\"}", + "expr": "kube_deployment_spec_replicas{deployment=~\".*-centralledger-.*|central-service\", cluster=\"$cluster\"}", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -2023,11 +2043,30 @@ } ], "refresh": "5s", - "schemaVersion": 38, - "style": "dark", + "schemaVersion": 39, "tags": [], "templating": { - "list": [] + "list": [ + { + "current": {}, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-15m", @@ -2060,7 +2099,7 @@ }, "timezone": "", "title": "mojaloop-central-ledger", - "uid": "5z9mkZ-pr", + "uid": "5z9mkZ-pr-v001", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json b/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json index dba95fa34..57e9de708 100644 --- a/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json +++ b/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json @@ -156,7 +156,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate(node_cpu_seconds_total{mode!=\"idle\"}[1m])) by (instance) /\nsum(irate(node_cpu_seconds_total[1m])) by (instance)\n* on(instance) group_left (nodename) node_uname_info{nodename=~\".+\"}", + "expr": "sum(irate(node_cpu_seconds_total{mode!=\"idle\", cluster=\"$cluster\"}[1m])) by (instance) /\nsum(irate(node_cpu_seconds_total{cluster=\"$cluster\"}[1m])) by (instance)\n* on(instance) group_left (nodename) node_uname_info{nodename=~\".+\", cluster=\"$cluster\"}", "instant": false, "legendFormat": "{{nodename}}", "range": true, @@ -253,7 +253,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n 1- (node_memory_MemAvailable_bytes/node_memory_MemTotal_bytes)\n) * on(instance) group_left (nodename) node_uname_info{nodename=~\".+\"}", + "expr": "(\n 1- (node_memory_MemAvailable_bytes{cluster=\"$cluster\"}/node_memory_MemTotal_bytes{cluster=\"$cluster\"})\n) * on(instance) group_left (nodename) node_uname_info{nodename=~\".+\", cluster=\"$cluster\"}", "instant": false, "legendFormat": "{{nodename}}", "range": true, @@ -350,7 +350,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n 1-(node_filesystem_avail_bytes{device=\"/dev/mapper/ubuntu--vg-ubuntu--lv\"}/node_filesystem_size_bytes{device=\"/dev/mapper/ubuntu--vg-ubuntu--lv\"})\n) * on(instance) group_left (nodename) node_uname_info{nodename=~\".+\"}", + "expr": "(\n 1-(node_filesystem_avail_bytes{device=\"/dev/mapper/ubuntu--vg-ubuntu--lv\", cluster=\"$cluster\"}/node_filesystem_size_bytes{device=\"/dev/mapper/ubuntu--vg-ubuntu--lv\", cluster=\"$cluster\"})\n) * on(instance) group_left (nodename) node_uname_info{nodename=~\".+\", cluster=\"$cluster\"}", "instant": false, "legendFormat": "{{nodename}}", "range": true, @@ -424,7 +424,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "max(node_uname_info{job=\"node-exporter\", nodename=~\".+(moja|kafka|sts).+\"}) by (nodename)", + "expr": "max(node_uname_info{job=\"node-exporter\", nodename=~\".+(moja|kafka|sts).+\", cluster=\"$cluster\"}) by (nodename)", "format": "table", "instant": true, "legendFormat": "auto", @@ -542,7 +542,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n sum(rate(namedprocess_namegroup_cpu_seconds_total[1m])) by (groupname,nodename)\n / on (nodename) group_left\n sum(rate(node_cpu_seconds_total[1m])) by (nodename)\n) > 0.7", + "expr": "(\n sum(rate(namedprocess_namegroup_cpu_seconds_total{cluster=\"$cluster\"}[1m])) by (groupname,nodename)\n / on (nodename) group_left\n sum(rate(node_cpu_seconds_total{cluster=\"$cluster\"}[1m])) by (nodename)\n) > 0.7", "hide": false, "instant": false, "legendFormat": "{{nodename}}:{{groupname}}", @@ -642,7 +642,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "(\n namedprocess_namegroup_memory_bytes{memtype=\"resident\"} \n / on (nodename) group_left\n node_memory_MemTotal_bytes\n) > 0.7\n", + "expr": "(\n namedprocess_namegroup_memory_bytes{memtype=\"resident\", cluster=\"$cluster\"} \n / on (nodename) group_left\n node_memory_MemTotal_bytes{cluster=\"$cluster\"}\n) > 0.7\n", "instant": false, "legendFormat": "{{nodename}}:{{groupname}}", "range": true, @@ -754,7 +754,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (serviceName, container)({error=~\"Error: read ECONNRESET\"})", + "expr": "sum by (serviceName, container)({error=~\"Error: read ECONNRESET\", cluster=\"$cluster\"})", "instant": false, "legendFormat": "ECONNRESET - {{serviceName}} - {{container}}", "range": true, @@ -766,7 +766,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (serviceName, container)({error=~\"Error: socket hang up\"})", + "expr": "sum by (serviceName, container)({error=~\"Error: socket hang up\", cluster=\"$cluster\"})", "hide": false, "instant": false, "legendFormat": "Socker hang up - {{serviceName}} - {{container}}", @@ -824,8 +824,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -862,7 +861,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka\"}[5m]))", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka\", cluster=\"$cluster\"}[5m]))", "hide": false, "instant": false, "legendFormat": "{{container}}", @@ -875,7 +874,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\"}[5m]))", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", "hide": true, "instant": false, "legendFormat": "__auto", @@ -888,7 +887,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\"}[5m]))", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", "hide": true, "instant": false, "legendFormat": "{{instance}}", @@ -946,8 +945,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -984,7 +982,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\"}[5m]))", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", "instant": false, "legendFormat": "__auto", "range": true, @@ -1041,8 +1039,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1079,7 +1076,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\"}[5m]))", + "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", "instant": false, "legendFormat": "__auto", "range": true, @@ -1136,8 +1133,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1174,7 +1170,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"kafka\",namespace=\"mojaloop\"}[5m]))", + "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"kafka\",namespace=\"mojaloop\", cluster=\"$cluster\"}[5m]))", "hide": false, "instant": false, "legendFormat": "__auto", @@ -1232,8 +1228,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1272,7 +1267,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka\"}", + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka\", cluster=\"$cluster\"}", "instant": false, "legendFormat": "{{container}}-{{id}}", "range": true, @@ -1329,8 +1324,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1367,7 +1361,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka|handler-pos-batch\"}[5m])", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka|handler-pos-batch\", cluster=\"$cluster\"}[5m])", "hide": false, "instant": false, "legendFormat": "{{container}} - {{id}}", @@ -1425,8 +1419,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1465,7 +1458,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\"}", + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}", "hide": false, "instant": false, "legendFormat": "{{container}}-{{id}}", @@ -1523,8 +1516,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1561,7 +1553,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\"}[5m])", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m])", "instant": false, "legendFormat": "{{container}}-{{id}}", "range": true, @@ -1618,8 +1610,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1658,7 +1649,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop-db\", container=\"mysql\"}", + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}", "hide": false, "instant": false, "legendFormat": "{{pod}}", @@ -1716,8 +1707,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1754,7 +1744,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop-db\", container=\"mysql\"}[5m])", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}[5m])", "instant": false, "legendFormat": "{{pod}}", "range": true, @@ -1811,8 +1801,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1849,7 +1838,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(max_over_time(mysql_global_status_threads_connected{namespace=\"mojaloop-db\"}[5m]))", + "expr": "sum by (pod)(max_over_time(mysql_global_status_threads_connected{namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", "hide": true, "instant": false, "legendFormat": "Connections {{pod}} last 5m", @@ -1862,7 +1851,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(mysql_global_status_max_used_connections{namespace=\"mojaloop-db\"})", + "expr": "sum by (pod)(mysql_global_status_max_used_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", "hide": false, "instant": false, "legendFormat": "Max Use Connections {{pod}}", @@ -1875,7 +1864,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(mysql_global_variables_max_connections{namespace=\"mojaloop-db\"})", + "expr": "sum by (pod)(mysql_global_variables_max_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", "hide": false, "instant": false, "legendFormat": "Max Connections {{pod}}", @@ -1908,8 +1897,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1948,7 +1936,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_deployment_status_replicas_available{namespace=\"mojaloop\",deployment=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service).*\"}) by (deployment)", + "expr": "sum(kube_deployment_status_replicas_available{namespace=\"mojaloop\",deployment=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service).*\", cluster=\"$cluster\"}) by (deployment)", "format": "table", "instant": true, "legendFormat": "{{deployment}}", @@ -1994,8 +1982,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2034,7 +2021,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop-db\", statefulset=~\".*(mysql).*\"}) by (statefulset)", + "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop-db\", statefulset=~\".*(mysql).*\", cluster=\"$cluster\"}) by (statefulset)", "format": "table", "instant": true, "legendFormat": "{{statefulset}}", @@ -2080,8 +2067,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -2120,7 +2106,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop\", statefulset=~\".*(kafka).*\"}) by (statefulset)", + "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop\", statefulset=~\".*(kafka).*\", cluster=\"$cluster\"}) by (statefulset)", "format": "table", "instant": true, "legendFormat": "{{statefulset}}", @@ -2160,6 +2146,25 @@ "name": "Filters", "skipUrlSync": false, "type": "adhoc" + }, + { + "current": {}, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -2170,7 +2175,7 @@ "timepicker": {}, "timezone": "", "title": "Performance Troubleshooting", - "uid": "f1068daf-16a2-4e52-9a57-1a6620925845-02", + "uid": "f1068daf-16a2-4e52-9a57-1a662092584-v002", "version": 1, "weekStart": "" -} +} \ No newline at end of file diff --git a/monitoring/dashboards/mojaloop/dashboard-quoting-service.json b/monitoring/dashboards/mojaloop/dashboard-quoting-service.json index d29d1688b..6ac79f863 100644 --- a/monitoring/dashboards/mojaloop/dashboard-quoting-service.json +++ b/monitoring/dashboards/mojaloop/dashboard-quoting-service.json @@ -228,7 +228,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_database_get_cache_value_count[$__rate_interval])) by (queryName, hit) ", + "expr": "sum(rate(moja_qs_database_get_cache_value_count{cluster=\"$cluster\"}[$__rate_interval])) by (queryName, hit) ", "hide": false, "legendFormat": "{{queryName}}-{{hit}}", "range": true, @@ -320,7 +320,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(moja_qs_quotes_id_get_count)", + "expr": "sum(moja_qs_quotes_id_get_count{cluster=\"$cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -419,7 +419,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(moja_qs_quotes_id_put_count)", + "expr": "sum(moja_qs_quotes_id_put_count{cluster=\"$cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -517,7 +517,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(moja_qs_quotes_id_put_error_count)", + "expr": "sum(moja_qs_quotes_id_put_error_count{cluster=\"$cluster\"})", "hide": false, "instant": false, "range": true, @@ -612,7 +612,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(moja_qs_quotes_post_count)", + "expr": "sum(moja_qs_quotes_post_count{cluster=\"$cluster\"})", "hide": false, "instant": false, "range": true, @@ -745,7 +745,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_qs_quotes_post_sum[$__rate_interval]) / rate(moja_qs_quotes_post_count[$__rate_interval]) >=0) by (success)", + "expr": "avg(rate(moja_qs_quotes_post_sum{cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_qs_quotes_post_count{cluster=\"$cluster\"}[$__rate_interval]) >=0) by (success)", "instant": false, "legendFormat": "postQuotes success:{{success}}", "range": true, @@ -757,7 +757,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_qs_quotes_id_put_sum[$__rate_interval]) / rate(moja_qs_quotes_id_put_count[$__rate_interval]) >=0) by (success)", + "expr": "avg(rate(moja_qs_quotes_id_put_sum{cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_qs_quotes_id_put_count{cluster=\"$cluster\"}[$__rate_interval]) >=0) by (success)", "hide": false, "instant": false, "legendFormat": "putQuotesByID success:{{success}}", @@ -770,7 +770,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_qs_quotes_id_get_sum[$__rate_interval]) / rate(moja_qs_quotes_id_get_count[$__rate_interval]) >=0) by (success)", + "expr": "avg(rate(moja_qs_quotes_id_get_sum{cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_qs_quotes_id_get_count{cluster=\"$cluster\"}[$__rate_interval]) >=0) by (success)", "hide": false, "instant": false, "legendFormat": "getQuotesByID success:{{success}}", @@ -783,7 +783,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(rate(moja_qs_quotes_id_put_error_sum[$__rate_interval]) / rate(moja_qs_quotes_id_put_error_count[$__rate_interval]) >=0) by (success)", + "expr": "avg(rate(moja_qs_quotes_id_put_error_sum{cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_qs_quotes_id_put_error_count{cluster=\"$cluster\"}[$__rate_interval]) >=0) by (success)", "hide": false, "instant": false, "legendFormat": "putQuotesByIDError success:{{success}}", @@ -887,7 +887,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_quotes_id_put_error_count[$__rate_interval])) by (success)", + "expr": "sum(rate(moja_qs_quotes_id_put_error_count{cluster=\"$cluster\"}[$__rate_interval])) by (success)", "instant": false, "interval": "", "legendFormat": "putQuotesIDError success:{{success}}", @@ -900,7 +900,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_quotes_id_get_count[$__rate_interval])) by (success)", + "expr": "sum(rate(moja_qs_quotes_id_get_count{cluster=\"$cluster\"}[$__rate_interval])) by (success)", "hide": false, "instant": false, "legendFormat": "getQuotesByID success:{{success}}", @@ -913,7 +913,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_quotes_id_put_count[$__rate_interval])) by (success)", + "expr": "sum(rate(moja_qs_quotes_id_put_count{cluster=\"$cluster\"}[$__rate_interval])) by (success)", "hide": false, "instant": false, "legendFormat": "putQuotesByID success:{{success}}", @@ -926,7 +926,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_quotes_post_count[$__rate_interval])) by (success)", + "expr": "sum(rate(moja_qs_quotes_post_count{cluster=\"$cluster\"}[$__rate_interval])) by (success)", "hide": false, "instant": false, "legendFormat": "postQuotes success:{{success}}", @@ -1031,7 +1031,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "avg(rate(moja_qs_model_quote_sum[$__rate_interval]) / rate(moja_qs_model_quote_count[$__rate_interval]) >=0) by (success, queryName)", + "expr": "avg(rate(moja_qs_model_quote_sum{cluster=\"$cluster\"}[$__rate_interval]) / rate(moja_qs_model_quote_count{cluster=\"$cluster\"}[$__rate_interval]) >=0) by (success, queryName)", "hide": false, "instant": false, "legendFormat": "{{queryName}} success:{{success}}", @@ -1135,7 +1135,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(moja_qs_model_quote_count[$__rate_interval])) by (success, queryName)", + "expr": "sum(rate(moja_qs_model_quote_count{cluster=\"$cluster\"}[$__rate_interval])) by (success, queryName)", "instant": false, "interval": "", "legendFormat": "{{queryName}} success:{{success}}", @@ -1205,8 +1205,8 @@ "overrides": [] }, "gridPos": { - "h": 3, - "w": 4, + "h": 7, + "w": 5, "x": 0, "y": 57 }, @@ -1236,7 +1236,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*quoting-service.*'}[2m])) * 100", + "expr": "sum (irate (container_cpu_usage_seconds_total{pod_name=~'.*quoting-service.*', cluster=\"$cluster\"}[2m])) * 100", "format": "time_series", "hide": true, "instant": true, @@ -1249,7 +1249,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(irate (container_cpu_usage_seconds_total{pod_name=~'.*quoting-service.*'}[2m]))", + "expr": "sum(irate (container_cpu_usage_seconds_total{pod_name=~'.*quoting-service.*', cluster=\"$cluster\"}[2m]))", "range": true, "refId": "B" } @@ -1263,7 +1263,7 @@ "h": 1, "w": 24, "x": 0, - "y": 60 + "y": 64 }, "id": 21, "panels": [], @@ -1335,7 +1335,7 @@ "h": 8, "w": 24, "x": 0, - "y": 61 + "y": 65 }, "id": 22, "options": { @@ -1365,7 +1365,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_qs_nodejs_heap_size_total_bytes{serviceName=~\"quoting-service.*\"}", + "expr": "moja_qs_nodejs_heap_size_total_bytes{serviceName=~\"quoting-service.*\", cluster=\"$cluster\"}", "legendFormat": "heap_total-{{instance}} {{kubernetes_pod_name}}", "range": true, "refId": "A" @@ -1376,7 +1376,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_qs_nodejs_heap_size_used_bytes{serviceName=~\"quoting-service.*\"}", + "expr": "moja_qs_nodejs_heap_size_used_bytes{serviceName=~\"quoting-service.*\", cluster=\"$cluster\"}", "legendFormat": "heap_used-{{instance}} {{kubernetes_pod_name}}", "range": true, "refId": "B" @@ -1387,7 +1387,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "moja_qs_nodejs_external_memory_bytes{serviceName=~\"quoting-service.*\"}", + "expr": "moja_qs_nodejs_external_memory_bytes{serviceName=~\"quoting-service.*\", cluster=\"$cluster\"}", "legendFormat": "ext_memory-{{instance}} {{kubernetes_pod_name}}", "range": true, "refId": "C" @@ -1462,7 +1462,7 @@ "h": 7, "w": 24, "x": 0, - "y": 69 + "y": 73 }, "id": 23, "links": [], @@ -1490,7 +1490,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum (rate (moja_qs_process_cpu_seconds_total{serviceName=~\"account-lookup.*\"}[2m])) by (kubernetes_pod_name)", + "expr": "sum (rate (moja_qs_process_cpu_seconds_total{serviceName=~\"account-lookup.*\", cluster=\"$cluster\"}[2m])) by (kubernetes_pod_name)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1504,7 +1504,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(\n rate(container_cpu_usage_seconds_total{pod_name=~'.*account-lookup.*'}[2m]))\nby (pod_name)", + "expr": "sum(\n rate(container_cpu_usage_seconds_total{pod_name=~'.*account-lookup.*', cluster=\"$cluster\"}[2m]))\nby (pod_name)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1518,7 +1518,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum(rate(container_cpu_usage_seconds_total{pod_name=~'.*account-lookup.*'}[2m]))", + "expr": "sum(rate(container_cpu_usage_seconds_total{pod_name=~'.*account-lookup.*', cluster=\"$cluster\"}[2m]))", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -1532,7 +1532,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(moja_qs_process_cpu_seconds_total{serviceName=~\"account-lookup.*\"}[30s]) * 100", + "expr": "rate(moja_qs_process_cpu_seconds_total{serviceName=~\"account-lookup.*\", cluster=\"$cluster\"}[30s]) * 100", "legendFormat": "{{kubernetes_pod_name}}", "range": true, "refId": "B" @@ -1547,7 +1547,7 @@ "h": 1, "w": 24, "x": 0, - "y": 76 + "y": 80 }, "id": 18, "panels": [], @@ -1620,7 +1620,7 @@ "h": 4, "w": 24, "x": 0, - "y": 77 + "y": 81 }, "id": 19, "links": [], @@ -1646,7 +1646,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "kube_deployment_spec_replicas{deployment=~\".*quoting-service.*\"}", + "expr": "kube_deployment_spec_replicas{deployment=~\".*quoting-service.*\", cluster=\"$cluster\"}", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -1723,7 +1723,7 @@ "h": 7, "w": 24, "x": 0, - "y": 81 + "y": 85 }, "id": 20, "links": [], @@ -1747,7 +1747,7 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "avg(moja_qs_nodejs_eventloop_lag_seconds{serviceName=~\"quoting-service.*\"}) by (app)", + "expr": "avg(moja_qs_nodejs_eventloop_lag_seconds{serviceName=~\"quoting-service.*\", cluster=\"$cluster\"}) by (app)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{app}}", @@ -1763,7 +1763,31 @@ "schemaVersion": 39, "tags": [], "templating": { - "list": [] + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { "from": "now-30m", @@ -1772,7 +1796,7 @@ "timepicker": {}, "timezone": "", "title": "Mojaloop - Quoting Service", - "uid": "a4373cfa-5295-430d-9e53-a59c008ffa6d", + "uid": "a4373cfa-5295-430d-9e53-a59c008ffa6-v001", "version": 2, "weekStart": "" -} +} \ No newline at end of file diff --git a/monitoring/dashboards/mojaloop/dashboard-simulators.json b/monitoring/dashboards/mojaloop/dashboard-simulators.json index 91d7a0975..8a7a6169b 100644 --- a/monitoring/dashboards/mojaloop/dashboard-simulators.json +++ b/monitoring/dashboards/mojaloop/dashboard-simulators.json @@ -9,17 +9,18 @@ "pluginName": "Prometheus" } ], + "__elements": {}, "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", - "version": "6.4.2" + "version": "10.2.3" }, { "type": "panel", "id": "graph", - "name": "Graph", + "name": "Graph (old)", "version": "" }, { @@ -30,8 +31,8 @@ }, { "type": "panel", - "id": "singlestat", - "name": "Singlestat", + "id": "stat", + "name": "Stat", "version": "" } ], @@ -39,7 +40,10 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", @@ -49,14 +53,18 @@ ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], + "liveNow": false, "panels": [ { "collapsed": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "645fe2f7-ce19-433c-b746-afca1936af31" + }, "gridPos": { "h": 1, "w": 24, @@ -65,6 +73,15 @@ }, "id": 4, "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "645fe2f7-ce19-433c-b746-afca1936af31" + }, + "refId": "A" + } + ], "title": "Processing", "type": "row" }, @@ -73,8 +90,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "Simulator - Processed per sec", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -83,6 +109,7 @@ "x": 0, "y": 1 }, + "hiddenSeries": false, "id": 2, "legend": { "alignAsTable": true, @@ -102,9 +129,10 @@ "links": [], "nullPointMode": "null as zero", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "10.2.3", "pointradius": 5, "points": false, "renderer": "flot", @@ -114,7 +142,11 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(moja_sim_request_count[120s])) by (fsp, source, operation)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (fsp, source, operation)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -122,7 +154,11 @@ "refId": "A" }, { - "expr": "sum(rate(moja_sim_request_count[120s])) by (kubernetes_pod_name)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (kubernetes_pod_name)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -130,7 +166,11 @@ "refId": "E" }, { - "expr": "sum(rate(moja_sim_request_count[120s])) by (fsp, source, operation, kubernetes_pod_name)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (fsp, source, operation, kubernetes_pod_name)", "format": "time_series", "hide": true, "intervalFactor": 1, @@ -138,7 +178,11 @@ "refId": "B" }, { - "expr": "sum(irate(moja_sim_request_count[120s])) by (app)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (app)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -146,15 +190,17 @@ "refId": "C" }, { - "expr": "sum(rate(moja_sim_request_count[120s])) by (operation)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(rate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (operation)", "legendFormat": "{{operation}}", "refId": "D" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Simulator - Processed per sec", "tooltip": { "shared": true, @@ -163,53 +209,64 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "postTransfer / sec for Payee", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 3, @@ -218,81 +275,78 @@ "y": 1 }, "id": 6, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "10.2.3", "targets": [ { - "expr": "sum(irate(moja_sim_request_count{operation='postTransfers'}[120s]))", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(moja_sim_request_count{operation='postTransfers',cluster=\"$cluster\"}[120s]))", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], - "thresholds": "", "title": "postTransfer / sec for Payee", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "current" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "putTransfersById / sec for Payee", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 3, @@ -301,44 +355,31 @@ "y": 4 }, "id": 12, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "10.2.3", "targets": [ { - "expr": "sum(irate(moja_sim_request_count{fsp=\"payee\", operation=\"putTransfersById\"}[120s]))", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(moja_sim_request_count{fsp=\"payee\", operation=\"putTransfersById\",cluster=\"$cluster\"}[120s]))", "format": "time_series", "hide": true, "instant": true, @@ -347,42 +388,56 @@ "refId": "A" }, { - "expr": "sum(irate(moja_sim_request_count{operation=\"putTransfersById\"}[120s]))", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(moja_sim_request_count{operation=\"putTransfersById\",cluster=\"$cluster\"}[120s]))", "hide": false, "refId": "B" } ], - "thresholds": "", "title": "putTransfersById / sec for Payee", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "avg" + "type": "stat" }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "#299c46", - "rgba(237, 129, 40, 0.89)", - "#d44a3a" - ], - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "messages / second", - "format": "none", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": false, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "0" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, "gridPos": { "h": 3, @@ -391,70 +446,56 @@ "y": 7 }, "id": 18, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "options": {}, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto", + "wideLayout": true }, - "tableColumn": "", + "pluginVersion": "10.2.3", "targets": [ { - "expr": "sum(irate(moja_sim_request_count[120s]))", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(irate(moja_sim_request_count{cluster=\"$cluster\"}[120s]))", "format": "time_series", "instant": true, "intervalFactor": 1, "refId": "A" } ], - "thresholds": "", "title": "messages / second", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "0", - "value": "null" - } - ], - "valueName": "avg" + "type": "stat" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "Simulators - Processing Time", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -463,6 +504,7 @@ "x": 0, "y": 10 }, + "hiddenSeries": false, "id": 8, "legend": { "alignAsTable": true, @@ -474,8 +516,6 @@ "min": true, "rightSide": true, "show": true, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -484,9 +524,10 @@ "links": [], "nullPointMode": "null as zero", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "10.2.3", "pointradius": 5, "points": false, "renderer": "flot", @@ -496,7 +537,11 @@ "steppedLine": false, "targets": [ { - "expr": "avg(irate(moja_sim_request_sum[120s]) / irate(moja_sim_request_count[120s])) by (fsp, operation, kubernetes_pod_name)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(irate(moja_sim_request_sum{cluster=\"$cluster\"}[120s]) / irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (fsp, operation, kubernetes_pod_name)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -504,7 +549,11 @@ "refId": "A" }, { - "expr": "avg(irate(moja_sim_request_sum[120s]) / irate(moja_sim_request_count[120s])) by (kubernetes_pod_name)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(irate(moja_sim_request_sum{cluster=\"$cluster\"}[120s]) / irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (kubernetes_pod_name)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -512,7 +561,11 @@ "refId": "B" }, { - "expr": "avg(irate(moja_sim_request_sum[120s]) / irate(moja_sim_request_count[120s])) by (fsp, operation)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(irate(moja_sim_request_sum{cluster=\"$cluster\"}[120s]) / irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (fsp, operation)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -520,14 +573,22 @@ "refId": "C" }, { - "expr": "avg(irate(moja_sim_request_sum[120s]) / irate(moja_sim_request_count[120s])) by (app)", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(irate(moja_sim_request_sum{cluster=\"$cluster\"}[120s]) / irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])) by (app)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{app}}", "refId": "D" }, { - "expr": "irate(moja_sim_request_sum[120s]) / irate(moja_sim_request_count[120s])", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "irate(moja_sim_request_sum{cluster=\"$cluster\"}[120s]) / irate(moja_sim_request_count{cluster=\"$cluster\"}[120s])", "format": "time_series", "hide": true, "interval": "", @@ -537,9 +598,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Simulators - Processing Time", "tooltip": { "shared": true, @@ -548,38 +607,32 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { "collapsed": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "645fe2f7-ce19-433c-b746-afca1936af31" + }, "gridPos": { "h": 1, "w": 24, @@ -588,6 +641,15 @@ }, "id": 10, "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "645fe2f7-ce19-433c-b746-afca1936af31" + }, + "refId": "A" + } + ], "title": "Misc", "type": "row" }, @@ -596,8 +658,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "description": "Simulators - Event Loop Lag in Seconds", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -606,6 +677,7 @@ "x": 0, "y": 20 }, + "hiddenSeries": false, "id": 16, "legend": { "avg": false, @@ -621,9 +693,10 @@ "links": [], "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "10.2.3", "pointradius": 5, "points": false, "renderer": "flot", @@ -633,16 +706,18 @@ "steppedLine": false, "targets": [ { - "expr": "avg(moja_nodejs_eventloop_lag_seconds{serviceName=\"simulator\"})", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "avg(moja_nodejs_eventloop_lag_seconds{serviceName=\"simulator\",cluster=\"$cluster\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Simulators - Event Loop Lag in Seconds", "tooltip": { "shared": true, @@ -651,33 +726,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "s", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -685,8 +751,17 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "${DS_PROMETHEUS}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "decimals": 0, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -695,6 +770,7 @@ "x": 0, "y": 27 }, + "hiddenSeries": false, "id": 14, "legend": { "alignAsTable": true, @@ -714,9 +790,10 @@ "links": [], "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "percentage": false, + "pluginVersion": "10.2.3", "pointradius": 5, "points": false, "renderer": "flot", @@ -726,7 +803,11 @@ "steppedLine": false, "targets": [ { - "expr": "kube_deployment_spec_replicas{deployment=~\".*simulator.*\"}", + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "kube_deployment_spec_replicas{deployment=~\".*simulator.*\",cluster=\"$cluster\"}", "format": "time_series", "instant": false, "intervalFactor": 1, @@ -735,9 +816,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Simulators - # Pods for each Components", "tooltip": { "shared": true, @@ -746,9 +825,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -756,36 +833,48 @@ { "decimals": 0, "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], - "refresh": false, - "schemaVersion": 20, - "style": "dark", + "refresh": "", + "schemaVersion": 39, "tags": [], "templating": { - "list": [] + "list": [ + { + "current": {}, + "definition": "label_values(node_uname_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(node_uname_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] }, "time": { - "from": "now-30m", + "from": "now-2d", "to": "now" }, "timepicker": { @@ -815,6 +904,7 @@ }, "timezone": "", "title": "Mojaloop - Simulators", - "uid": "-Sr0y8fip", - "version": 14 -} + "uid": "Sr0y8fip-v001", + "version": 1, + "weekStart": "" +} \ No newline at end of file From 5013ae6ab017364ad8518febfa1eff4f0844aab0 Mon Sep 17 00:00:00 2001 From: Muzammil Date: Fri, 5 Jul 2024 16:00:32 +0500 Subject: [PATCH 2/3] feat(#3994): update performance dashboard (#637) --- ...dashboard-performance-troubleshooting.json | 1616 +++++++++++++---- 1 file changed, 1287 insertions(+), 329 deletions(-) diff --git a/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json b/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json index 57e9de708..7d4730053 100644 --- a/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json +++ b/monitoring/dashboards/mojaloop/dashboard-performance-troubleshooting.json @@ -11,6 +11,12 @@ ], "__elements": {}, "__requires": [ + { + "type": "panel", + "id": "barchart", + "name": "Bar chart", + "version": "" + }, { "type": "grafana", "id": "grafana", @@ -653,27 +659,11 @@ "type": "timeseries" }, { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "id": 11, - "panels": [], - "title": "Container Information", - "type": "row" - }, - { - "": { - "type": "prometheus", - "uid": "Prometheus" - }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows network interfaces on each node which are consuming more than 1 MiB/s bandwidth (download).", "fieldConfig": { "defaults": { "color": { @@ -724,26 +714,31 @@ "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 24, + "h": 5, + "w": 12, "x": 0, - "y": 12 + "y": 11 }, - "id": 9, + "id": 30, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "last" + ], + "displayMode": "table", "placement": "right", - "showLegend": true + "showLegend": true, + "sortBy": "Last", + "sortDesc": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -754,27 +749,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (serviceName, container)({error=~\"Error: read ECONNRESET\", cluster=\"$cluster\"})", + "expr": "sum(rate(node_network_receive_bytes_total{device!~\"cali.*|lo\",cluster=\"$cluster\"}[1m])) by (nodename,instance,device) > 1024 * 1024", "instant": false, - "legendFormat": "ECONNRESET - {{serviceName}} - {{container}}", + "legendFormat": "{{nodename}}/{{device}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (serviceName, container)({error=~\"Error: socket hang up\", cluster=\"$cluster\"})", - "hide": false, - "instant": false, - "legendFormat": "Socker hang up - {{serviceName}} - {{container}}", - "range": true, - "refId": "B" } ], - "title": "ECONNRESET and Socket hang up ERRORs", + "title": "Network I/O - Download", "type": "timeseries" }, { @@ -782,6 +764,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows network interfaces on each node which are consuming more than 1 MiB/s bandwidth (upload)", "fieldConfig": { "defaults": { "color": { @@ -824,33 +807,39 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 6, - "x": 0, - "y": 16 + "w": 12, + "x": 12, + "y": 11 }, - "id": 1, + "id": 31, "options": { "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -861,41 +850,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka\", cluster=\"$cluster\"}[5m]))", - "hide": false, + "expr": "sum(rate(node_network_transmit_bytes_total{device!~\"cali.*|lo\",cluster=\"$cluster\"}[1m])) by (nodename,instance,device) > 1024 * 1024", "instant": false, - "legendFormat": "{{container}}", + "legendFormat": "{{nodename}}/{{device}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", - "hide": true, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", - "hide": true, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "C" } ], - "title": "mojaloop container restarting in last 5m", + "title": "Network I/O - Upload", "type": "timeseries" }, { @@ -903,6 +865,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows disks on each node which are consuming more than 1 MiB/s disk read bandwidth ", "fieldConfig": { "defaults": { "color": { @@ -945,33 +908,39 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 6, - "x": 6, + "w": 12, + "x": 0, "y": 16 }, - "id": 2, + "id": 32, "options": { "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -982,14 +951,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", + "expr": "sum(rate(node_disk_read_bytes_total{cluster=\"$cluster\"}[1m])) by (instance,nodename,device) > 1024 * 1024", "instant": false, - "legendFormat": "__auto", + "legendFormat": "{{nodename}}/{{device}}", "range": true, "refId": "A" } ], - "title": "Mojaloop sidecar restarting last 5 minutes", + "title": "Disk I/O - Read", "type": "timeseries" }, { @@ -997,6 +966,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows disks on each node which are consuming more than 1 MiB/s disk write bandwidth ", "fieldConfig": { "defaults": { "color": { @@ -1039,33 +1009,39 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 6, + "w": 12, "x": 12, "y": 16 }, - "id": 3, + "id": 33, "options": { "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Last", + "sortDesc": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -1076,17 +1052,34 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", + "expr": "sum(rate(node_disk_written_bytes_total{cluster=\"$cluster\"}[1m])) by (instance,nodename,device) > 1024 * 1024", "instant": false, - "legendFormat": "__auto", + "legendFormat": "{{nodename}}/{{device}}", "range": true, "refId": "A" } ], - "title": "Databases restarting last 5 minutes", + "title": "Disk I/O - Write", "type": "timeseries" }, { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 11, + "panels": [], + "title": "Container Information", + "type": "row" + }, + { + "": { + "type": "prometheus", + "uid": "Prometheus" + }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" @@ -1133,7 +1126,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1145,17 +1139,17 @@ "overrides": [] }, "gridPos": { - "h": 5, - "w": 6, - "x": 18, - "y": 16 + "h": 4, + "w": 24, + "x": 0, + "y": 22 }, - "id": 10, + "id": 9, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom", + "placement": "right", "showLegend": true }, "tooltip": { @@ -1170,15 +1164,27 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"kafka\",namespace=\"mojaloop\", cluster=\"$cluster\"}[5m]))", - "hide": false, + "expr": "sum by (serviceName, container)({error=~\"Error: read ECONNRESET\", cluster=\"$cluster\"})", "instant": false, - "legendFormat": "__auto", + "legendFormat": "ECONNRESET - {{serviceName}} - {{container}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (serviceName, container)({error=~\"Error: socket hang up\", cluster=\"$cluster\"})", + "hide": false, + "instant": false, + "legendFormat": "Socker hang up - {{serviceName}} - {{container}}", + "range": true, + "refId": "B" } ], - "title": "Kafka restarting last 5 minutes", + "title": "ECONNRESET and Socket hang up ERRORs", "type": "timeseries" }, { @@ -1228,25 +1234,25 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - }, - "unit": "bytes" + } }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 5, + "w": 6, "x": 0, - "y": 21 + "y": 26 }, - "id": 4, + "id": 1, "options": { "legend": { "calcs": [], @@ -1266,15 +1272,41 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka\", cluster=\"$cluster\"}", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka\", cluster=\"$cluster\"}[5m]))", + "hide": false, "instant": false, - "legendFormat": "{{container}}-{{id}}", + "legendFormat": "{{container}}", "range": true, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", + "hide": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", + "hide": true, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "C" } ], - "title": "Mojaloop containers memory usage", + "title": "mojaloop container restarting in last 5m", "type": "timeseries" }, { @@ -1324,7 +1356,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1336,12 +1369,12 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 21 + "h": 5, + "w": 6, + "x": 6, + "y": 26 }, - "id": 5, + "id": 2, "options": { "legend": { "calcs": [], @@ -1361,15 +1394,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka|handler-pos-batch\", cluster=\"$cluster\"}[5m])", - "hide": false, + "expr": "sum by (container)(increase(kube_pod_container_status_restarts_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m]))", "instant": false, - "legendFormat": "{{container}} - {{id}}", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Mojaloop container cpu usage", + "title": "Mojaloop sidecar restarting last 5 minutes", "type": "timeseries" }, { @@ -1419,25 +1451,25 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - }, - "unit": "bytes" + } }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 28 + "h": 5, + "w": 6, + "x": 12, + "y": 26 }, - "id": 6, + "id": 3, "options": { "legend": { "calcs": [], @@ -1457,16 +1489,14 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}", - "hide": false, + "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"mysql\",namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", "instant": false, - "legendFormat": "{{container}}-{{id}}", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Mojaloop sidecar containers memory usage", + "title": "Databases restarting last 5 minutes", "type": "timeseries" }, { @@ -1516,7 +1546,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1528,12 +1559,12 @@ "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 28 + "h": 5, + "w": 6, + "x": 18, + "y": 26 }, - "id": 7, + "id": 10, "options": { "legend": { "calcs": [], @@ -1553,14 +1584,15 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m])", + "expr": "sum by (pod)(increase(kube_pod_container_status_restarts_total{container=\"kafka\",namespace=\"mojaloop\", cluster=\"$cluster\"}[5m]))", + "hide": false, "instant": false, - "legendFormat": "{{container}}-{{id}}", + "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Mojaloop sidecar container cpu usage", + "title": "Kafka restarting last 5 minutes", "type": "timeseries" }, { @@ -1610,7 +1642,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1626,9 +1659,9 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 31 }, - "id": 20, + "id": 4, "options": { "legend": { "calcs": [], @@ -1649,15 +1682,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "container_memory_usage_bytes{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}", - "hide": false, + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka\", cluster=\"$cluster\"}", "instant": false, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}}-{{id}}", "range": true, "refId": "A" } ], - "title": "Database containers memory usage", + "title": "Mojaloop containers memory usage", "type": "timeseries" }, { @@ -1707,7 +1739,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1722,9 +1755,9 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 31 }, - "id": 21, + "id": 5, "options": { "legend": { "calcs": [], @@ -1744,14 +1777,15 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}[5m])", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|kafka|handler-pos-batch\", cluster=\"$cluster\"}[5m])", + "hide": false, "instant": false, - "legendFormat": "{{pod}}", + "legendFormat": "{{container}} - {{id}}", "range": true, "refId": "A" } ], - "title": "Database container cpu usage", + "title": "Mojaloop container cpu usage", "type": "timeseries" }, { @@ -1759,6 +1793,7 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows mojaloop pods which are consuming more than 1 MiB/s network download bandwidth ", "fieldConfig": { "defaults": { "color": { @@ -1801,28 +1836,32 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 24, + "h": 7, + "w": 12, "x": 0, - "y": 42 + "y": 38 }, - "id": 8, + "id": 34, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "last" + ], + "displayMode": "table", "placement": "bottom", "showLegend": true }, @@ -1838,41 +1877,15 @@ "uid": "${DS_PROMETHEUS}" }, "editorMode": "code", - "expr": "sum by (pod)(max_over_time(mysql_global_status_threads_connected{namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", - "hide": true, + "exemplar": false, + "expr": "rate(container_network_receive_bytes_total{namespace=~\"mojaloop\", pod=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka).*\", cluster=\"$cluster\"}[5m]) > 1024 * 1024", "instant": false, - "legendFormat": "Connections {{pod}} last 5m", + "legendFormat": "{{pod}}", "range": true, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (pod)(mysql_global_status_max_used_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", - "hide": false, - "instant": false, - "legendFormat": "Max Use Connections {{pod}}", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "editorMode": "code", - "expr": "sum by (pod)(mysql_global_variables_max_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", - "hide": false, - "instant": false, - "legendFormat": "Max Connections {{pod}}", - "range": true, - "refId": "C" } ], - "title": "MySQL Connections", + "title": "Mojaloop pods Network I/O - Download", "type": "timeseries" }, { @@ -1880,54 +1893,83 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows mojaloop pods which are consuming more than 1 MiB/s network upload bandwidth ", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "inspect": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 6, - "w": 6, - "x": 0, - "y": 46 + "h": 7, + "w": 12, + "x": 12, + "y": 38 }, - "id": 17, + "id": 35, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "legend": { + "calcs": [ + "last" ], - "show": true + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, - "showHeader": true + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -1936,29 +1978,15 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_deployment_status_replicas_available{namespace=\"mojaloop\",deployment=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service).*\", cluster=\"$cluster\"}) by (deployment)", - "format": "table", - "instant": true, - "legendFormat": "{{deployment}}", - "range": false, + "expr": "rate(container_network_transmit_bytes_total{namespace=~\"mojaloop\", pod=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service|handler-pos-batch|kafka).*\", cluster=\"$cluster\"}[5m]) > 1024 * 1024 ", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, "refId": "A" } ], - "title": "Mojaloop replicaset count", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "includeByName": {}, - "indexByName": {}, - "renameByName": {} - } - } - ], - "type": "table" + "title": "Mojaloop pods Network I/O - Upload", + "type": "timeseries" }, { "datasource": { @@ -1968,51 +1996,882 @@ "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "inspect": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 6, - "w": 6, - "x": 6, - "y": 46 + "h": 7, + "w": 12, + "x": 0, + "y": 45 }, - "id": 18, + "id": 6, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": true + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "showHeader": true + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}", + "hide": false, + "instant": false, + "legendFormat": "{{container}}-{{id}}", + "range": true, + "refId": "A" + } + ], + "title": "Mojaloop sidecar containers memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop\",container=~\"account-lookup-service-sidecar|account-lookup-service-admin-sidcar|centralledger-handler-admin-transfer-sidecar|centralledger-handler-timeout-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-fulfil-sidecar|centralledger-handler-transfer-get-sidecar|centralledger-handler-transfer-position-sidecar|centralledger-service-sidecar\", cluster=\"$cluster\"}[5m])", + "instant": false, + "legendFormat": "{{container}}-{{id}}", + "range": true, + "refId": "A" + } + ], + "title": "Mojaloop sidecar container cpu usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 52 + }, + "id": 20, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "container_memory_usage_bytes{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}", + "hide": false, + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Database containers memory usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 52 + }, + "id": 21, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(container_cpu_usage_seconds_total{namespace=~\"mojaloop-db\", container=\"mysql\", cluster=\"$cluster\"}[5m])", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Database container cpu usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 59 + }, + "id": 25, + "panels": [], + "title": "Mojaloop", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 60 + }, + "id": 17, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_deployment_status_replicas_available{namespace=\"mojaloop\",deployment=~\".*(account-lookup-service|account-lookup-service-admin|centralledger-handler-admin-transfer|centralledger-handler-timeout|centralledger-handler-transfer-fulfil|centralledger-handler-transfer-get|centralledger-handler-transfer-position|centralledger-service).*\", cluster=\"$cluster\"}) by (deployment)", + "format": "table", + "instant": true, + "legendFormat": "{{deployment}}", + "range": false, + "refId": "A" + } + ], + "title": "Mojaloop replicaset count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 60 + }, + "id": 18, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop-db\", statefulset=~\".*(mysql).*\", cluster=\"$cluster\"}) by (statefulset)", + "format": "table", + "instant": true, + "legendFormat": "{{statefulset}}", + "range": false, + "refId": "A" + } + ], + "title": "MySQL statefulset count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 60 + }, + "id": 19, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": true + }, + "showHeader": true + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop\", statefulset=~\".*(kafka).*\", cluster=\"$cluster\"}) by (statefulset)", + "format": "table", + "instant": true, + "legendFormat": "{{statefulset}}", + "range": false, + "refId": "A" + } + ], + "title": "Kafka statefulset count", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "includeByName": {}, + "indexByName": {}, + "renameByName": {} + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 24, + "panels": [], + "title": "My SQL", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 67 + }, + "id": 8, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (pod)(max_over_time(mysql_global_status_threads_connected{namespace=\"mojaloop-db\", cluster=\"$cluster\"}[5m]))", + "hide": true, + "instant": false, + "legendFormat": "Connections {{pod}} last 5m", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (pod)(mysql_global_status_max_used_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", + "hide": false, + "instant": false, + "legendFormat": "Max Use Connections {{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "sum by (pod)(mysql_global_variables_max_connections{namespace=\"mojaloop-db\", cluster=\"$cluster\"})", + "hide": false, + "instant": false, + "legendFormat": "Max Connections {{pod}}", + "range": true, + "refId": "C" + } + ], + "title": "MySQL Connections", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 71 + }, + "id": 26, + "panels": [], + "title": "Kafka", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Shows the no. of partitions for each topic", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 72 + }, + "id": 27, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 }, - "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -2021,53 +2880,55 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop-db\", statefulset=~\".*(mysql).*\", cluster=\"$cluster\"}) by (statefulset)", - "format": "table", + "expr": "sum(kafka_topic_partitions{cluster=\"$cluster\"}) by (topic)", "instant": true, - "legendFormat": "{{statefulset}}", + "legendFormat": "__auto", "range": false, "refId": "A" } ], - "title": "MySQL statefulset count", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "includeByName": {}, - "indexByName": {}, - "renameByName": {} - } - } - ], - "type": "table" + "title": "Partition count w-r-t topic", + "type": "barchart" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "description": "Shows the no. of consumers in each consumer group", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "inspect": false + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2079,25 +2940,35 @@ "overrides": [] }, "gridPos": { - "h": 6, - "w": 6, - "x": 12, - "y": 46 + "h": 7, + "w": 8, + "x": 8, + "y": 72 }, - "id": 19, + "id": 29, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [ + "last" ], - "show": true + "displayMode": "table", + "placement": "bottom", + "showLegend": true }, - "showHeader": true + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 }, - "pluginVersion": "10.2.3", "targets": [ { "datasource": { @@ -2106,29 +2977,112 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_statefulset_status_replicas_available{namespace=\"mojaloop\", statefulset=~\".*(kafka).*\", cluster=\"$cluster\"}) by (statefulset)", - "format": "table", + "expr": "sum(kafka_consumergroup_members{cluster=\"$cluster\"}) by (consumergroup)", "instant": true, - "legendFormat": "{{statefulset}}", + "legendFormat": "__auto", "range": false, "refId": "A" } ], - "title": "Kafka statefulset count", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true + "title": "Consumers count w-r-t consumer group", + "type": "barchart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Shows the maximum lag (in message count) faced by a consumer in given (consumergroup,topic). ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "includeByName": {}, - "indexByName": {}, - "renameByName": {} + "lineWidth": 1, + "scaleDistribution": { + "type": "linear" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 72 + }, + "id": 28, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "fullHighlight": false, + "groupWidth": 0.7, + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": { + "mode": "single", + "sort": "none" + }, + "xTickLabelRotation": 0, + "xTickLabelSpacing": 0 + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max(clamp_min(kafka_consumergroup_lag{cluster=\"$cluster\"},0)) by (consumergroup,topic)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } ], - "type": "table" + "title": "Max consumer group lag", + "type": "barchart" } ], "refresh": "", @@ -2149,6 +3103,10 @@ }, { "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "definition": "label_values(node_uname_info,cluster)", "hide": 0, "includeAll": false, @@ -2175,7 +3133,7 @@ "timepicker": {}, "timezone": "", "title": "Performance Troubleshooting", - "uid": "f1068daf-16a2-4e52-9a57-1a662092584-v002", + "uid": "f1068daf-16a2-4e52-9a57-1a662092584-v005", "version": 1, "weekStart": "" } \ No newline at end of file From 5df5666f1878dc64889778d168095e03280e6fa8 Mon Sep 17 00:00:00 2001 From: Muzammil Date: Tue, 9 Jul 2024 20:16:24 +0500 Subject: [PATCH 3/3] feat(#3996): added prometheus remote write dashboard (#639) --- .../prometheus-remote-write.json | 1662 +++++++++++++++++ 1 file changed, 1662 insertions(+) create mode 100644 monitoring/dashboards/infrastructure/prometheus-remote-write.json diff --git a/monitoring/dashboards/infrastructure/prometheus-remote-write.json b/monitoring/dashboards/infrastructure/prometheus-remote-write.json new file mode 100644 index 000000000..41137d5e4 --- /dev/null +++ b/monitoring/dashboards/infrastructure/prometheus-remote-write.json @@ -0,0 +1,1662 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__elements": {}, + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.2.3" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": false, + "keepTime": false, + "tags": [], + "targetBlank": false, + "title": "Monitoring remote write blog", + "tooltip": "", + "type": "link", + "url": "https://grafana.com/blog/2021/04/12/how-to-troubleshoot-remote-write-issues-in-prometheus/" + } + ], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 4, + "panels": [], + "title": "Headline", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "# Goal\n1. indicate problems with prometheus remote write\n2. performance metrics (request rate, latencies) related to remote write \n\n# Links\n- https://grafana.com/blog/2021/04/12/how-to-troubleshoot-remote-write-issues-in-prometheus/\n\n# Goal 1\nGoal 1 requires us to ensure \n1. remote write is not falling behind i.e. samples are not lost\n2. remote write should not have too many retries to the server\n3. remote write should not be operating at max capacity \n4. if remote write lost data, how much was it and when did it happen!\n\n\n# Goal 2\nshows general performance metrics\n1. request rate to remote server\n2. latency percentiles to remote server ", + "mode": "markdown" + }, + "pluginVersion": "10.2.3", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "", + "instant": false, + "range": true, + "refId": "A" + } + ], + "title": "Instructions", + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 5, + "panels": [], + "title": "Goal 1.1: Remote write should not fall behind", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Shows the amount of time after which samples are pushed to remote server after reading from WAL. \n- Expected range should be 3-10 seconds\n- increasing delay time means remote write is falling behind and will lead to data loss", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "(\n prometheus_remote_storage_highest_timestamp_in_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} \n- \n ignoring(remote_name, url) group_right(instance) (prometheus_remote_storage_queue_highest_sent_timestamp_seconds{cluster=~\"$cluster\", instance=~\"$instance\"} != 0)\n)", + "instant": false, + "legendFormat": "{{pod}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write sending delay", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Shows the amount of time after which samples are being read by TSDB WAL watcher. The delay may increase in case of prometheus restarts (WAL replay). Considerable increase (20sec +) suggests an outage and possible data loss. \n\n- Expected delay should be less than 20 seconds\n- increasing delay time means remote write is falling behind and will lead to data loss", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "-(prometheus_wal_watcher_current_segment{cluster=~\"$cluster\", instance=~\"$instance\"}\n - ignoring(consumer) group_left\nprometheus_tsdb_wal_segment_current{cluster=~\"$cluster\", instance=~\"$instance\"})\n", + "instant": false, + "legendFormat": "{{consumer}}", + "range": true, + "refId": "A" + } + ], + "title": "WAL watcher delay", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 6, + "panels": [], + "title": "Goal 1.2: Remote write data rates", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "No. of samples/metadata sent per second. \nShould show steady activity. Reduced activity should be coupled with increase in error rate or less timeseries data being scrapped. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "samples/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 19 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_samples_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "hide": false, + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Request rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "No. of samples/metadata retried per second. \n\nIdeally zero. It indicates problems with the network or remote storage endpoint if there is a steady high rate for this metric. That may mean we need to reduce throughput of remote write to reduce load on the other end.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "samples/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 19 + }, + "id": 8, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "hide": false, + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Retry rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 13, + "panels": [], + "title": "Goal 1.3: Remote write should not operate at max capacity", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "No. of shards currently being used to send data to remote write endpoinds. This number should be less than max shards. A number near max shards mean that we may be throttled in case of any data increase", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "prometheus_remote_storage_shards{cluster=~\"$cluster\", instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write current shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "No. of shards estimated by algorithm to push data to remote endpoints ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 16, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "prometheus_remote_storage_shards_desired{cluster=~\"$cluster\", instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write desired shards", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Prometheus remote write min and max shards as per configuration", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "prometheus_remote_storage_shards_min{cluster=~\"$cluster\", instance=~\"$instance\"}", + "instant": false, + "legendFormat": "{{remote_name}} - min shards", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "prometheus_remote_storage_shards_max{cluster=~\"$cluster\", instance=~\"$instance\"}", + "hide": false, + "instant": false, + "legendFormat": "{{remote_name}} - max shards", + "range": true, + "refId": "B" + } + ], + "title": "Min - Max Shards", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 17, + "panels": [], + "title": "Goal 1.4: Remote write data loss quantization", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "samples/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 37 + }, + "id": 18, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_samples_dropped_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Dropped samples rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "samples/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 37 + }, + "id": 19, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_samples_failed_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Failed samples rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "samples/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 37 + }, + "id": 20, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_samples_retried_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Retried samples rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "Rate of enqueue operation failure because a shard’s queue was full", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "attempt/sec" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 37 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "last" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "rate(prometheus_remote_storage_enqueue_retries_total{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Enqueue attempt rate", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 9, + "panels": [], + "title": "Remote write latencies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 43 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])) by (le,remote_name))", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write request latency - p99", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 43 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.90, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])) by (le,remote_name))", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write request latency - p90", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 43 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "last", + "mean" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(prometheus_remote_storage_sent_batch_duration_seconds_bucket{cluster=~\"$cluster\", instance=~\"$instance\"}[5m])) by (le,remote_name))", + "instant": false, + "legendFormat": "{{remote_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Remote write request latency - p50", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(prometheus_build_info,cluster)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(prometheus_build_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(prometheus_build_info{cluster=\"$cluster\"},instance)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "instance", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(prometheus_build_info{cluster=\"$cluster\"},instance)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Mojaloop - Prometheus Remote write", + "uid": "d2a65853-b3b1-47bf-b728-cc727b209e5b", + "version": 1, + "weekStart": "" + } + \ No newline at end of file