From 9697c0d41e2a353f2dda4c353b272e8be570a935 Mon Sep 17 00:00:00 2001 From: Yogesh Sharma Date: Fri, 7 Aug 2020 10:38:52 -0700 Subject: [PATCH] exporter queries, prometheus config file and dashboards for container deployments (#188) (#190) * exporter queries, prometheus config file and dashboards * same dashboards. But exported from Grafana GUI * fix file permissions * add links and fix units on network panel * rename the 4 golden signals dashboard and add disk info to the saturation panel * change metric names from ccp_container to ccp_nodemx, update dashboards with new names, remove invalid panel links * rename queries_containers.yml to queries_nodemx.yml * fix the description of process count metric * exported dashboards to use externally Co-authored-by: Pramodh Mereddy <36273111+pmereddy1@users.noreply.github.com> --- exporter/postgres/queries_nodemx.yml | 156 ++ grafana/containers/crud_details.json | 330 ++++ grafana/containers/pgbackrest.json | 334 ++++ grafana/containers/pod_details.json | 870 +++++++++ grafana/containers/postgresql_details.json | 1740 +++++++++++++++++ grafana/containers/postgresql_overview.json | 229 +++ .../postgresql_service_health_overview.json | 576 ++++++ grafana/containers/prometheus_alerts.json | 782 ++++++++ prometheus/crunchy-prometheus.yml.containers | 66 + 9 files changed, 5083 insertions(+) create mode 100644 exporter/postgres/queries_nodemx.yml create mode 100644 grafana/containers/crud_details.json create mode 100644 grafana/containers/pgbackrest.json create mode 100644 grafana/containers/pod_details.json create mode 100644 grafana/containers/postgresql_details.json create mode 100644 grafana/containers/postgresql_overview.json create mode 100644 grafana/containers/postgresql_service_health_overview.json create mode 100644 grafana/containers/prometheus_alerts.json create mode 100644 prometheus/crunchy-prometheus.yml.containers diff --git a/exporter/postgres/queries_nodemx.yml b/exporter/postgres/queries_nodemx.yml new file mode 100644 index 00000000..548dcec4 --- /dev/null +++ b/exporter/postgres/queries_nodemx.yml @@ -0,0 +1,156 @@ +### +# +# Begin File: pod_metrics.yml +# +### +ccp_nodemx_network: + query: "SELECT interface,tx_bytes,tx_packets, rx_bytes,rx_packets from monitor.proc_network_stats()" + metrics: + - interface: + usage: "LABEL" + - tx_bytes: + usage: "GAUGE" + description: "Number of bytes transmitted" + - tx_packets: + usage: "GAUGE" + description: "Number of packets transmitted" + - rx_bytes: + usage: "GAUGE" + description: "Number of bytes received" + - rx_packets: + usage: "GAUGE" + description: "Number of packets received" + +ccp_nodemx_process: + query: "SELECT monitor.cgroup_process_count() as count" + metrics: + - count: + usage: "GAUGE" + description: "Total number of database processes" + + +ccp_nodemx_mem: + query: "SELECT monitor.kdapi_scalar_bigint('mem_request') as request, monitor.kdapi_scalar_bigint('mem_limit') as limit, (select val from monitor.cgroup_setof_kv('memory.stat') where key='cache') as cache, (select val from monitor.cgroup_setof_kv('memory.stat') where key='rss') as rss, (select val from monitor.cgroup_setof_kv('memory.stat') where key='shmem') as shmem, (select val from monitor.cgroup_setof_kv('memory.stat') where key='mapped_file') as mapped_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='dirty') as dirty, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_anon') as active_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_anon') as inactive_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_file') as active_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_file') as inactive_file" + metrics: + - request: + usage: "GAUGE" + description: "Memory request value in bytes" + - limit: + usage: "GAUGE" + description: "Memory limit value in bytes" + - cache: + usage: "GAUGE" + description: "Total bytes of page cache memory" + - rss: + usage: "GAUGE" + description: "Total bytes of anonymous and swap cache memory" + - shmem: + usage: "GAUGE" + description: "Total bytes of shared memory" + - mapped_file: + usage: "GAUGE" + description: "Total bytes of mapped file (includes tmpfs/shmem)" + - mapped_file: + usage: "GAUGE" + description: "Total bytes of mapped file (includes tmpfs/shmem)" + - dirty: + usage: "GAUGE" + description: "Total bytes that are waiting to get written back to the disk" + - active_anon: + usage: "GAUGE" + description: "Total bytes of anonymous and swap cache memory on active LRU list" + - inactive_anon: + usage: "GAUGE" + description: "Total bytes of anonymous and swap cache memory on inactive LRU list" + - active_file: + usage: "GAUGE" + description: "Total bytes of file-backed memory on active LRU list" + - inactive_file: + usage: "GAUGE" + description: "Total bytes of file-backed memory on inactive LRU list" + + +ccp_nodemx_cpu: + query: "SELECT monitor.kdapi_scalar_bigint('cpu_request') as request, monitor.kdapi_scalar_bigint('cpu_limit') as limit" + metrics: + - request: + usage: "GAUGE" + description: "CPU request value in milli cores" + - limit: + usage: "GAUGE" + description: "CPU limit value in milli cores" + +ccp_nodemx_cpucfs: + query: "SELECT monitor.cgroup_scalar_bigint('cpu.cfs_period_us') as period_us, monitor.cgroup_scalar_bigint('cpu.cfs_quota_us') as quota_us" + metrics: + - period_us: + usage: "GAUGE" + description: "the total available run-time within a period (in microseconds)" + - quota_us: + usage: "GAUGE" + description: "the length of a period (in microseconds)" + +ccp_nodemx_cpuacct: + query: "SELECT monitor.cgroup_scalar_bigint('cpuacct.usage') as usage" + metrics: + - usage: + usage: "GAUGE" + description: "CPU usage in nanoseconds" + +ccp_nodemx_cpustat: + query: "select (SELECT val as nr_periods FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_periods'), (SELECT val as nr_throttled FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_throttled'), (SELECT val as throttled_time FROM monitor.cgroup_setof_kv('cpu.stat') where key='throttled_time')" + metrics: + - nr_threads: + usage: "GAUGE" + description: "number of periods that any thread was runnable" + - nr_throttled: + usage: "GAUGE" + description: "number of runnable periods in which the application used its entire quota and was throttled" + - throttled_time: + usage: "GAUGE" + description: "sum total amount of time individual threads within the monitor.cgroup were throttled" + + +ccp_nodemx_data_disk: + query: "SELECT mount_point, fs_type,reads_completed_successfully as reads, sectors_read, writes_completed as writes,sectors_written, total_bytes,available_bytes,total_file_nodes,free_file_nodes + FROM monitor.proc_mountinfo() m + JOIN monitor.proc_diskstats() d USING (major_number, minor_number) + JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number) + WHERE m.mount_point like '/pg%' or m.mount_point like '/tablespace%'" + metrics: + - mount_point: + usage: "LABEL" + description: "mount point" + - fs_type: + usage: "GAUGE" + description: "File system type" + - reads: + usage: "GAUGE" + description: "Total reads" + - sectors_read: + usage: "GAUGE" + description: "Total sectors read" + - writes: + usage: "GAUGE" + description: "Total writes" + - sectors_written: + usage: "GAUGE" + description: "Total sectors writen" + - total_bytes: + usage: "GAUGE" + description: "Size in bytes" + - available_bytes: + usage: "GAUGE" + description: "Available size in bytes" + - total_file_nodes: + usage: "GAUGE" + description: "Total file nodes" + - free_file_nodes: + usage: "GAUGE" + description: "Available file nodes" + +### +# +# End File: pod_metrics.yml +# +### diff --git a/grafana/containers/crud_details.json b/grafana/containers/crud_details.json new file mode 100644 index 00000000..a6818690 --- /dev/null +++ b/grafana/containers/crud_details.json @@ -0,0 +1,330 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:111", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817489973, + "links": [ + { + "icon": "external link", + "includeVars": true, + "tags": [], + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 0 + }, + "height": "480", + "hiddenSeries": false, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "maxPerRow": 2, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ccp_stat_user_tables_n_tup_ins{pg_cluster=\"[[cluster]]\", instance=~\"[[pod]]\", dbname =~ \"[[dbname]]\", schemaname =~ \"[[schemaname]]\", relname=~\"[[tablename]]\"}[60s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "inserts - [[dbname]].[[schemaname]].[[tablename]]", + "refId": "A", + "step": 60 + }, + { + "expr": "sum(rate(ccp_stat_user_tables_n_tup_upd{pg_cluster=\"[[cluster]]\", instance=~\"[[pod]]\", dbname =~ \"[[dbname]]\", schemaname =~ \"[[schemaname]]\", relname=~\"[[tablename]]\"}[60s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Updates - [[dbname]].[[schemaname]].[[tablename]]", + "refId": "B", + "step": 60 + }, + { + "expr": "sum(rate(ccp_stat_user_tables_n_tup_del{pg_cluster=\"[[cluster]]\", instance=~\"[[pod]]\", dbname =~ \"[[dbname]]\", schemaname =~ \"[[schemaname]]\", relname=~\"[[tablename]]\"}[60s]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Deletes - [[dbname]].[[schemaname]].[[tablename]]", + "refId": "C", + "step": 60 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CRUD", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "30s", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "index": -1, + "label": null, + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(pg_cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "hide": 0, + "includeAll": true, + "index": -1, + "label": "pod", + "multi": true, + "name": "pod", + "options": [], + "query": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(ccp_database_size_bytes{pg_cluster=\"[[cluster]]\", instance=\"[[pod]]\"},dbname)", + "hide": 0, + "includeAll": true, + "index": -1, + "label": "dbname", + "multi": true, + "name": "dbname", + "options": [], + "query": "label_values(ccp_database_size_bytes{pg_cluster=\"[[cluster]]\", instance=\"[[pod]]\"},dbname)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": true, + "index": -1, + "label": "schemaname", + "multi": true, + "name": "schemaname", + "options": [], + "query": "label_values(ccp_stat_user_tables_n_tup_ins{pg_cluster=\"[[cluster]]\",dbname=\"[[dbname]]\"},schemaname)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": true, + "index": -1, + "label": null, + "multi": true, + "name": "tablename", + "options": [], + "query": "label_values(ccp_stat_user_tables_n_tup_ins{pg_cluster=\"[[cluster]]\",instance=\"[[pod]]\",dbname=\"[[dbname]]\",schemaname=\"[[schemaname]]\"},relname)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "CRUD_Details", + "uid": "cruddetails", + "variables": { + "list": [] + }, + "version": 2 +} \ No newline at end of file diff --git a/grafana/containers/pgbackrest.json b/grafana/containers/pgbackrest.json new file mode 100644 index 00000000..802d6750 --- /dev/null +++ b/grafana/containers/pgbackrest.json @@ -0,0 +1,334 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:183", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817505521, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_backrest_last_incr_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\", role=\"master\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Full", + "refId": "A" + }, + { + "expr": "ccp_backrest_last_diff_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\", role=\"master\"}", + "legendFormat": "Diff", + "refId": "B" + }, + { + "expr": "ccp_backrest_last_full_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\", role=\"master\"}", + "legendFormat": "Incr", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Time Since Last Completed Backup", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_backrest_last_runtime_backup_runtime_seconds{pg_cluster=\"[[cluster]]\", role=\"master\",backup_type=\"full\"}", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "Full", + "refId": "A" + }, + { + "expr": "ccp_backrest_last_runtime_backup_runtime_seconds{pg_cluster=\"[[cluster]]\", role=\"master\",backup_type=\"diff\"}", + "legendFormat": "Diff", + "refId": "B" + }, + { + "expr": "ccp_backrest_last_runtime_backup_runtime_seconds{pg_cluster=\"[[cluster]]\",role=\"master\", backup_type=\"incr\"}", + "legendFormat": "Incr", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Backup Runtimes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 2, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "15m", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(pg_cluster)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(pg_cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "pgBackRest", + "uid": "pgbackrest", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/grafana/containers/pod_details.json b/grafana/containers/pod_details.json new file mode 100644 index 00000000..48f068b6 --- /dev/null +++ b/grafana/containers/pod_details.json @@ -0,0 +1,870 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:43", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817522512, + "links": [ + { + "$$hashKey": "object:200", + "icon": "external link", + "tags": [], + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(ccp_nodemx_data_disk_total_bytes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}-ccp_nodemx_data_disk_available_bytes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})*100/ccp_nodemx_data_disk_total_bytes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mount_point}} ", + "refId": "A" + }, + { + "expr": "(ccp_nodemx_data_disk_total_file_nodes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}-ccp_nodemx_data_disk_free_file_nodes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})*100/ccp_nodemx_data_disk_total_file_nodes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mount_point}} - Inodes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:193", + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "$$hashKey": "object:194", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 0 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ccp_nodemx_data_disk_sectors_read{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}[1m])*512", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mount_point}} - Reads", + "refId": "A" + }, + { + "expr": "rate(ccp_nodemx_data_disk_sectors_written{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}[1m])*512", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mount_point}} - Writes ", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk Activity", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Inactive anon": "super-light-purple", + "Limit": "red", + "Request": "green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 6 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_nodemx_mem_limit{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Limit", + "refId": "A" + }, + { + "expr": "ccp_nodemx_mem_request{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Request", + "refId": "B" + }, + { + "expr": "ccp_nodemx_mem_cache{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Cached", + "refId": "C" + }, + { + "expr": "ccp_nodemx_mem_dirty{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Dirty", + "refId": "D" + }, + { + "expr": "ccp_nodemx_mem_shmem{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "shared mem", + "refId": "E" + }, + { + "expr": "ccp_nodemx_mem_rss{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "RSS", + "refId": "F" + }, + { + "expr": "ccp_nodemx_mem_mapped_file{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Mapped file", + "refId": "G" + }, + { + "expr": "ccp_nodemx_mem_active_anon{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active anon", + "refId": "H" + }, + { + "expr": "ccp_nodemx_mem_inactive_anon{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Inactive anon", + "refId": "I" + }, + { + "expr": "ccp_nodemx_mem_active_file{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active file", + "refId": "J" + }, + { + "expr": "ccp_nodemx_mem_inactive_file{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Inactive file", + "refId": "K" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "% Throttled": "dark-orange", + "% Used": "blue" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 6 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Process count", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(((rate(ccp_nodemx_cpuacct_usage{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}[1m]))/1000)*100)/((ccp_nodemx_cpucfs_quota_us{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}/ccp_nodemx_cpucfs_period_us{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"})*1000*1000)", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "% Used", + "refId": "A" + }, + { + "expr": "rate(ccp_nodemx_cpustat_nr_throttled{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}[1m])*100/rate(ccp_nodemx_cpustat_nr_periods{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}[1m])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "% Throttled", + "refId": "B" + }, + { + "expr": "ccp_nodemx_process_count{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Process count", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Stats", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": "110", + "min": "0", + "show": true + }, + { + "format": "short", + "label": "Process count", + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 12 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ccp_nodemx_network_rx_bytes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\", interface!=\"tunl0\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{interface}} - rx bytes", + "refId": "A" + }, + { + "expr": "rate(ccp_nodemx_network_tx_bytes{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\", interface!=\"tunl0\"}[1m])", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{interface}} - tx bytes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:121", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "$$hashKey": "object:122", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "CPU limit": "red", + "CPU request": "blue", + "Memory limit": "dark-red", + "Memory request": "dark-green" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 12 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "CPU request", + "yaxis": 2 + }, + { + "alias": "CPU limit", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_nodemx_cpu_limit{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU limit", + "refId": "A" + }, + { + "expr": "ccp_nodemx_cpu_request{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU request", + "refId": "B" + }, + { + "expr": "ccp_nodemx_mem_limit{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Memory limit", + "refId": "C" + }, + { + "expr": "ccp_nodemx_mem_request{pg_cluster=\"[[cluster]]\",pod=\"[[pod]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Memory request", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Container resources", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Memory", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": "CPU (millicores)", + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(pg_cluster)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(pg_cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "pod", + "multi": false, + "name": "pod", + "options": [], + "query": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + " 30s", + " 1m", + "5m", + "15m", + "30m" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "POD Details", + "uid": "4t6SO2Fik", + "variables": { + "list": [] + }, + "version": 2 +} \ No newline at end of file diff --git a/grafana/containers/postgresql_details.json b/grafana/containers/postgresql_details.json new file mode 100644 index 00000000..8ca616fb --- /dev/null +++ b/grafana/containers/postgresql_details.json @@ -0,0 +1,1740 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:48", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817584625, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorPrefix": false, + "colorValue": false, + "colors": [ + "#56A64B", + "#FF9830", + "#E02F44" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "dtdurations", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 27, + "interval": null, + "links": [ + { + "title": "pgBackRest", + "url": "/dashboard/db/pgBackRest" + } + ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "Time Since Last Backup:", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(ccp_backrest_last_incr_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\"} < ccp_backrest_last_diff_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\"} or ccp_backrest_last_incr_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\"} < ccp_backrest_last_full_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\"} or ccp_backrest_last_incr_backup_time_since_completion_seconds{pg_cluster=\"[[cluster]]\"}) ", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "86400,172800", + "timeFrom": null, + "timeShift": null, + "title": "[[cluster]] : Backup Status", + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "min" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 2 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"active\"})*100/sum(pg_settings_max_connections{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "", + "metric": "pg_stat_activity_count", + "refId": "A", + "step": 10 + } + ], + "thresholds": "70,90", + "title": "Active Connections", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 2 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})/sum(pg_settings_max_connections{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "refId": "B" + } + ], + "thresholds": "10,30", + "title": "Idle In Transaction", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 2 + }, + "id": 3, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})*100/sum(pg_settings_max_connections{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "metric": "pg_stat_activity_count", + "refId": "A", + "step": 10 + } + ], + "thresholds": "60,80", + "title": "Idle", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 2 + }, + "hiddenSeries": false, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "CRUD_Details", + "url": "/dashboard/db/crud_details?$__all_variables" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(pg_stat_database_xact_commit{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m])) + sum(irate(pg_stat_database_xact_rollback{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "TPS", + "metric": "pg_stat_database_tup_fetched", + "refId": "A", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TPS - [[pod]]-[[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 2 + }, + "hiddenSeries": false, + "id": 26, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "CRUD_Details", + "url": "/dashboard/db/crud_details?$__all_variables" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "idle", + "metric": "pg_stat_database_tup_fetched", + "refId": "A", + "step": 2 + }, + { + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"idle in transaction\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Idle in txn", + "refId": "B" + }, + { + "expr": "sum by (state) (pg_stat_activity_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",state=\"active\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "active", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Connections - [[pod]]-[[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 9 + }, + "hiddenSeries": false, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ccp_database_size_bytes{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"})/(1024*1024)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Total : [[cluster]]-[[pod]]", + "refId": "B" + }, + { + "expr": "ccp_database_size_bytes{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",dbname=~\"[[datname]]\"}/(1024*1024)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{dbname}} ({{pod}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "database size - [[pod]]-[[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decmbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 12, + "x": 12, + "y": 9 + }, + "hiddenSeries": false, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_wal_activity_total_size_bytes{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\"}/(1024*1024)", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "WAL size MB - [[cluster]]-[[pod]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decmbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + { + "targetBlank": true, + "title": "CRUD_Details", + "url": "/dashboard/db/crud_details?$__all_variables" + } + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(pg_stat_database_tup_fetched{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Fetched", + "metric": "pg_stat_database_tup_fetched", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(pg_stat_database_tup_inserted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Inserted", + "metric": "pg_stat_database_tup_inserted", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(irate(pg_stat_database_tup_updated{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Updated", + "metric": "pg_stat_database_tup_updated", + "refId": "C", + "step": 2 + }, + { + "expr": "sum(irate(pg_stat_database_tup_deleted{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Deleted", + "metric": "pg_stat_database_tup_deleted", + "refId": "D", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Row activity - [[pod]]- [[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 14 + }, + "hiddenSeries": false, + "id": 29, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_replication_lag_size_bytes{pg_cluster=\"[[cluster]]\", service!~\".*replica\"}", + "format": "time_series", + "hide": false, + "instant": false, + "intervalFactor": 1, + "legendFormat": "Replica ({{replica}}) lag bytes", + "refId": "B" + }, + { + "expr": "ccp_replication_lag_replay_time{pg_cluster=\"[[cluster]]\", service=~\".*replica\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Replica ({{ip}}) lag time", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Replication Status - [[cluster]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "Lag in bytes", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "dthms", + "label": "Lag time (hh:mm:ss)", + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": true, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 20 + }, + "hiddenSeries": false, + "id": 15, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(pg_stat_database_deadlocks{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Conflicts", + "metric": "pg_stat_database_conflicts", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(rate(pg_stat_database_conflicts{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "DeadLocks", + "metric": "pg_stat_database_deadlocks", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Conflicts/DeadLocks - [[pod]] - [[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 20 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "pg_stat_database_blks_hit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\", datid!=\"0\", datname!~\"template0\", datname!~\"template1\"}*100/(pg_stat_database_blks_hit{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\", datname!~\"template0\", datname!~\"template1\"} + pg_stat_database_blks_read{pg_cluster=\"[[cluster]]\",pod=~\"[[pod]]\",datname!~\"template0\", datname!~\"template1\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{datname}} - ({{pod}})", + "refId": "A" + }, + { + "expr": "sum(pg_stat_database_blks_hit{pg_cluster=\"[[cluster]]\"}*100/sum(pg_stat_database_blks_hit{pg_cluster=\"[[cluster]]\"} + pg_stat_database_blks_read{pg_cluster=\"[[cluster]]\"})", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Overall", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Cache Hit Ratio - [[pod]]-[[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 26 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ccp_stat_bgwriter_buffers_alloc{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Allocated", + "metric": "pg_stat_bgwriter_buffers_alloc", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(ccp_stat_bgwriter_buffers_backend{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend", + "metric": "pg_stat_bgwriter_buffers_backend", + "refId": "B", + "step": 2 + }, + { + "expr": "sum(ccp_stat_bgwriter_buffers_backend_fsync{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "FSync", + "metric": "pg_stat_bgwriter_buffers_backend_fsync", + "refId": "C", + "step": 2 + }, + { + "expr": "sum(ccp_stat_bgwriter_buffers_checkpoint{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "CheckPoint", + "metric": "pg_stat_bgwriter_buffers_checkpoint", + "refId": "D", + "step": 2 + }, + { + "expr": "sum(ccp_stat_bgwriter_buffers_clean{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Clean", + "metric": "pg_stat_bgwriter_buffers_clean", + "refId": "E", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Buffers - [[pod]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 26 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(pg_stat_database_xact_commit{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Commit", + "metric": "pg_stat_database_xact_commit", + "refId": "A", + "step": 2 + }, + { + "expr": "sum(irate(pg_stat_database_xact_rollback{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Rollback", + "refId": "B", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Commit & Rollback", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 17, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"accessexclusivelock\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "A", + "step": 2 + }, + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"exclusivelock\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "C", + "step": 2 + }, + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"rowexclusivelock\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "D", + "step": 2 + }, + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"sharerowexclusivelock\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "G", + "step": 2 + }, + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"shareupdateexclusivelock\"})", + "format": "time_series", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "refId": "H", + "step": 2 + }, + { + "expr": "sum by (mode) (pg_locks_count{pg_cluster=~\"[[cluster]]\",pod=~\"[[pod]]\",datname=~\"[[datname]]\",mode=\"accesssharelock\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Locks - [[pod]] - [[datname]]", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allFormat": "glob", + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(pg_cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "hide": 0, + "includeAll": true, + "index": -1, + "label": "pod", + "multi": true, + "name": "pod", + "options": [], + "query": "label_values({pg_cluster=\"[[cluster]]\"},pod)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allFormat": "glob", + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values({pg_cluster=\"[[cluster]]\" ,pod=\"[[pod]]\"},dbname)", + "hide": 0, + "includeAll": true, + "index": -1, + "label": "Database", + "multi": true, + "name": "datname", + "options": [], + "query": "label_values({pg_cluster=\"[[cluster]]\" ,pod=\"[[pod]]\"},dbname)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "PostgreSQLDetails", + "uid": "3NfGQ_6Wk", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/grafana/containers/postgresql_overview.json b/grafana/containers/postgresql_overview.json new file mode 100644 index 00000000..528fa5a6 --- /dev/null +++ b/grafana/containers/postgresql_overview.json @@ -0,0 +1,229 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:40", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817539605, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorPostfix": false, + "colorPrefix": false, + "colorValue": false, + "colors": [ + "#bf1b00", + "#eab839", + "#56A64B" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "short", + "gauge": { + "maxValue": 2, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": false + }, + "gridPos": { + "h": 3, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": null, + "links": [ + { + "targetBlank": true, + "title": "PostgreSQLDetails", + "url": "dashboard/db/postgresqldetails?$__url_time_range&$__all_variables" + } + ], + "mappingType": 2, + "mappingTypes": [ + { + "$$hashKey": "object:78", + "$hashKey": "object:295", + "name": "value to text", + "value": 1 + }, + { + "$$hashKey": "object:79", + "$hashKey": "object:296", + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "maxPerRow": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "$$hashKey": "object:81", + "from": "0", + "text": "DOWN", + "to": "99" + }, + { + "$$hashKey": "object:82", + "from": "100", + "text": "Standalone Cluster", + "to": "199" + }, + { + "$$hashKey": "object:83", + "from": "200", + "text": "HA CLUSTER", + "to": "1000" + } + ], + "repeat": "cluster", + "repeatDirection": "h", + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "$hashKey": "object:243", + "expr": "sum(pg_up{pg_cluster=\"$cluster\"})*100+sum(ccp_is_in_recovery_status{pg_cluster=\"$cluster\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{cluster}}", + "metric": "up", + "refId": "A", + "step": 2 + } + ], + "thresholds": "10,100", + "title": "$cluster - Overview", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current" + } + ], + "refresh": "30s", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allFormat": "glob", + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(pg_cluster)", + "hide": 2, + "includeAll": true, + "index": -1, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": "label_values(pg_cluster)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "PostgreSQL Overview", + "uid": "pgoverview", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/grafana/containers/postgresql_service_health_overview.json b/grafana/containers/postgresql_service_health_overview.json new file mode 100644 index 00000000..17172799 --- /dev/null +++ b/grafana/containers/postgresql_service_health_overview.json @@ -0,0 +1,576 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:270", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1596817557729, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 7, + "w": 11, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_connection_stats_total{service=\"[[service_name]]\"}*100/ccp_connection_stats_max_connections{service=\"[[service_name]]\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "connections", + "refId": "C" + }, + { + "expr": "(ccp_nodemx_data_disk_total_bytes{service=\"[[service_name]]\"}-ccp_nodemx_data_disk_available_bytes{service=\"[[service_name]]\"})*100/ccp_nodemx_data_disk_total_bytes{service=\"[[service_name]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Mount:{{mount_point}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Saturation (pct used)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percent", + "label": null, + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "cacheTimeout": null, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 5, + "fillGradient": 5, + "gridPos": { + "h": 7, + "w": 11, + "x": 11, + "y": 0 + }, + "hiddenSeries": false, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ccp_stat_database_xact_commit{service=\"[[service_name]]\"}[1m])) + sum(irate(ccp_stat_database_xact_rollback{service=\"[[service_name]]\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Transactions per minute", + "refId": "A" + }, + { + "expr": "sum(irate(ccp_query_count{service=\"[[service_name]]\"}[1m])) + sum(irate(ccp_query_count{service=\"[[service_name]]\"}[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Queries per minute", + "refId": "B" + }, + { + "expr": "ccp_connection_stats_active{service=\"[[service_name]]\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active connections", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Traffic", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "description": "Errors", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 7, + "w": 11, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ccp_stat_database_xact_rollback{service=\"[[service_name]]\"}[1m]))", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "legendFormat": "Rollbacks", + "refId": "A" + }, + { + "expr": "sum(irate(ccp_stat_database_deadlocks{service=\"[[service_name]]\"}[1m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Deadlock ", + "refId": "D" + }, + { + "expr": "sum(irate(ccp_stat_database_conflicts{service=\"[[service_name]]\"}[1m]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "Conflicts", + "refId": "B" + }, + { + "expr": "pg_exporter_last_scrape_error{service=\"[[service_name]]\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "scrape error", + "refId": "C" + }, + { + "expr": "clamp_max(ccp_archive_command_status_seconds_since_last_fail{service=\"[[service_name]]\"},1)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "archive error", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Errors", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": "512", + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "fillGradient": 5, + "gridPos": { + "h": 7, + "w": 11, + "x": 11, + "y": 7 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ + {} + ], + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ccp_connection_stats_max_query_time{service=\"[[service_name]]\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Max", + "refId": "A" + }, + { + "expr": "ccp_query_avg_latency{service=\"[[service_name]]\"}", + "format": "time_series", + "hide": true, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Avg - ({{dbname}})", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "5s", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(service)", + "hide": 0, + "includeAll": false, + "index": -1, + "label": "service_name", + "multi": false, + "name": "service_name", + "options": [], + "query": "label_values(service)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "PostgreSQL Service Health Overview", + "uid": "pRyvrRgGz", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/grafana/containers/prometheus_alerts.json b/grafana/containers/prometheus_alerts.json new file mode 100644 index 00000000..b20cab01 --- /dev/null +++ b/grafana/containers/prometheus_alerts.json @@ -0,0 +1,782 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "PROMETHEUS", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "6.7.4" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [ + { + "$$hashKey": "object:212", + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "links": [ + { + "asDropdown": false, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [], + "targetBlank": true, + "title": "", + "type": "dashboards" + } + ], + "panels": [ + { + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "severity_num", + "thresholds": [ + "200", + "300" + ], + "type": "string", + "unit": "none" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "alertstate", + "thresholds": [], + "type": "hidden", + "unit": "short", + "valueMaps": [] + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "alert_value", + "thresholds": [], + "type": "string", + "unit": "none" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "crunchy_collect", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "crunchy_pgha_scope", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pg_cluster_id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pgouser", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pod_template_hash", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "severity_num", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "vendor", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "kubernetes_pod_name", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "workflowid", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "name", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "service", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pgo_pg_database", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS{alertstate=\"firing\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 4, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "title": "Active Alerts", + "transform": "table", + "type": "table" + }, + { + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "fontSize": "100%", + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 3, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "align": "auto", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "align": "auto", + "colorMode": "row", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "severity_num", + "thresholds": [ + "200", + "300" + ], + "type": "string", + "unit": "none" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "alertstate", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "alert_value", + "thresholds": [], + "type": "string", + "unit": "none" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "crunchy_collect", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "crunchy_pgha_scope", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "kubernetes_pod_name", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "name", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pg_cluster_id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pgo_pg_database", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pod_template_hash", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "vendor", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "workflowid", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pgouser", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS{alertstate=\"firing\"}", + "format": "table", + "instant": false, + "interval": "", + "intervalFactor": 4, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": "1w", + "title": "Alert History (1 week)", + "transform": "table", + "type": "table" + } + ], + "refresh": "5m", + "schemaVersion": 22, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Prometheus Alerts", + "uid": "NzvdSiSMz", + "variables": { + "list": [] + }, + "version": 1 +} \ No newline at end of file diff --git a/prometheus/crunchy-prometheus.yml.containers b/prometheus/crunchy-prometheus.yml.containers new file mode 100644 index 00000000..b6538943 --- /dev/null +++ b/prometheus/crunchy-prometheus.yml.containers @@ -0,0 +1,66 @@ +--- +global: + scrape_interval: 15s + scrape_timeout: 15s + evaluation_interval: 5s + +scrape_configs: +- job_name: 'crunchy-collect' + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_crunchy_collect] + action: keep + regex: true + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: drop + regex: 5432 + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: drop + regex: 10000 + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: drop + regex: 8009 + - source_labels: [__meta_kubernetes_pod_container_port_number] + action: drop + regex: 2022 + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: kubernetes_namespace + - source_labels: [__meta_kubernetes_pod_name] + target_label: pod + - source_labels: [__meta_kubernetes_pod_name] + regex: (^[^-]*).* + target_label: instance + replacement: '$1' + - source_labels: [__meta_kubernetes_namespace,instance] + target_label: pg_cluster + separator: ':' + replacement: '$1$2' + - source_labels: [__meta_kubernetes_pod_ip] + target_label: ip + replacement: '$1' + - source_labels: [__meta_kubernetes_pod_label_deployment_name] + target_label: deployment + replacement: '$1' + - source_labels: [__meta_kubernetes_pod_label_service_name] + target_label: service + replacement: '$1' + - source_labels: [__meta_kubernetes_pod_label_role] + target_label: role + replacement: '$1' + - source_labels: [dbname] + target_label: dbname + replacement: '$1' + - source_labels: [relname] + target_label: relname + replacement: '$1' + - source_labels: [schemaname] + target_label: schemaname + replacement: '$1' + - target_label: exp_type + replacement: 'pg' + +rule_files: + - /conf/crunchy-alert-rules.yml