Skip to content

Commit

Permalink
exporter queries, prometheus config file and dashboards for container…
Browse files Browse the repository at this point in the history
… deployments (#188) (#190)

* exporter queries, prometheus config file and dashboards

* same dashboards. But exported from Grafana GUI

* fix file permissions

* add links and fix units on network panel

* rename the 4 golden signals dashboard and add disk info to the saturation panel

* change metric names from ccp_container to ccp_nodemx, update dashboards with new names, remove invalid panel links

* rename queries_containers.yml to queries_nodemx.yml

* fix the description of process count metric

* exported dashboards to use externally

Co-authored-by: Pramodh Mereddy <[email protected]>
  • Loading branch information
sharmay and pmereddy1 authored Aug 7, 2020
1 parent c45f65c commit 9697c0d
Show file tree
Hide file tree
Showing 9 changed files with 5,083 additions and 0 deletions.
156 changes: 156 additions & 0 deletions exporter/postgres/queries_nodemx.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
###
#
# Begin File: pod_metrics.yml
#
###
ccp_nodemx_network:
query: "SELECT interface,tx_bytes,tx_packets, rx_bytes,rx_packets from monitor.proc_network_stats()"
metrics:
- interface:
usage: "LABEL"
- tx_bytes:
usage: "GAUGE"
description: "Number of bytes transmitted"
- tx_packets:
usage: "GAUGE"
description: "Number of packets transmitted"
- rx_bytes:
usage: "GAUGE"
description: "Number of bytes received"
- rx_packets:
usage: "GAUGE"
description: "Number of packets received"

ccp_nodemx_process:
query: "SELECT monitor.cgroup_process_count() as count"
metrics:
- count:
usage: "GAUGE"
description: "Total number of database processes"


ccp_nodemx_mem:
query: "SELECT monitor.kdapi_scalar_bigint('mem_request') as request, monitor.kdapi_scalar_bigint('mem_limit') as limit, (select val from monitor.cgroup_setof_kv('memory.stat') where key='cache') as cache, (select val from monitor.cgroup_setof_kv('memory.stat') where key='rss') as rss, (select val from monitor.cgroup_setof_kv('memory.stat') where key='shmem') as shmem, (select val from monitor.cgroup_setof_kv('memory.stat') where key='mapped_file') as mapped_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='dirty') as dirty, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_anon') as active_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_anon') as inactive_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_file') as active_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_file') as inactive_file"
metrics:
- request:
usage: "GAUGE"
description: "Memory request value in bytes"
- limit:
usage: "GAUGE"
description: "Memory limit value in bytes"
- cache:
usage: "GAUGE"
description: "Total bytes of page cache memory"
- rss:
usage: "GAUGE"
description: "Total bytes of anonymous and swap cache memory"
- shmem:
usage: "GAUGE"
description: "Total bytes of shared memory"
- mapped_file:
usage: "GAUGE"
description: "Total bytes of mapped file (includes tmpfs/shmem)"
- mapped_file:
usage: "GAUGE"
description: "Total bytes of mapped file (includes tmpfs/shmem)"
- dirty:
usage: "GAUGE"
description: "Total bytes that are waiting to get written back to the disk"
- active_anon:
usage: "GAUGE"
description: "Total bytes of anonymous and swap cache memory on active LRU list"
- inactive_anon:
usage: "GAUGE"
description: "Total bytes of anonymous and swap cache memory on inactive LRU list"
- active_file:
usage: "GAUGE"
description: "Total bytes of file-backed memory on active LRU list"
- inactive_file:
usage: "GAUGE"
description: "Total bytes of file-backed memory on inactive LRU list"


ccp_nodemx_cpu:
query: "SELECT monitor.kdapi_scalar_bigint('cpu_request') as request, monitor.kdapi_scalar_bigint('cpu_limit') as limit"
metrics:
- request:
usage: "GAUGE"
description: "CPU request value in milli cores"
- limit:
usage: "GAUGE"
description: "CPU limit value in milli cores"

ccp_nodemx_cpucfs:
query: "SELECT monitor.cgroup_scalar_bigint('cpu.cfs_period_us') as period_us, monitor.cgroup_scalar_bigint('cpu.cfs_quota_us') as quota_us"
metrics:
- period_us:
usage: "GAUGE"
description: "the total available run-time within a period (in microseconds)"
- quota_us:
usage: "GAUGE"
description: "the length of a period (in microseconds)"

ccp_nodemx_cpuacct:
query: "SELECT monitor.cgroup_scalar_bigint('cpuacct.usage') as usage"
metrics:
- usage:
usage: "GAUGE"
description: "CPU usage in nanoseconds"

ccp_nodemx_cpustat:
query: "select (SELECT val as nr_periods FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_periods'), (SELECT val as nr_throttled FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_throttled'), (SELECT val as throttled_time FROM monitor.cgroup_setof_kv('cpu.stat') where key='throttled_time')"
metrics:
- nr_threads:
usage: "GAUGE"
description: "number of periods that any thread was runnable"
- nr_throttled:
usage: "GAUGE"
description: "number of runnable periods in which the application used its entire quota and was throttled"
- throttled_time:
usage: "GAUGE"
description: "sum total amount of time individual threads within the monitor.cgroup were throttled"


ccp_nodemx_data_disk:
query: "SELECT mount_point, fs_type,reads_completed_successfully as reads, sectors_read, writes_completed as writes,sectors_written, total_bytes,available_bytes,total_file_nodes,free_file_nodes
FROM monitor.proc_mountinfo() m
JOIN monitor.proc_diskstats() d USING (major_number, minor_number)
JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number)
WHERE m.mount_point like '/pg%' or m.mount_point like '/tablespace%'"
metrics:
- mount_point:
usage: "LABEL"
description: "mount point"
- fs_type:
usage: "GAUGE"
description: "File system type"
- reads:
usage: "GAUGE"
description: "Total reads"
- sectors_read:
usage: "GAUGE"
description: "Total sectors read"
- writes:
usage: "GAUGE"
description: "Total writes"
- sectors_written:
usage: "GAUGE"
description: "Total sectors writen"
- total_bytes:
usage: "GAUGE"
description: "Size in bytes"
- available_bytes:
usage: "GAUGE"
description: "Available size in bytes"
- total_file_nodes:
usage: "GAUGE"
description: "Total file nodes"
- free_file_nodes:
usage: "GAUGE"
description: "Available file nodes"

###
#
# End File: pod_metrics.yml
#
###
Loading

0 comments on commit 9697c0d

Please sign in to comment.