-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
exporter queries, prometheus config file and dashboards for container…
… deployments (#188) (#190) * exporter queries, prometheus config file and dashboards * same dashboards. But exported from Grafana GUI * fix file permissions * add links and fix units on network panel * rename the 4 golden signals dashboard and add disk info to the saturation panel * change metric names from ccp_container to ccp_nodemx, update dashboards with new names, remove invalid panel links * rename queries_containers.yml to queries_nodemx.yml * fix the description of process count metric * exported dashboards to use externally Co-authored-by: Pramodh Mereddy <[email protected]>
- Loading branch information
Showing
9 changed files
with
5,083 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
### | ||
# | ||
# Begin File: pod_metrics.yml | ||
# | ||
### | ||
ccp_nodemx_network: | ||
query: "SELECT interface,tx_bytes,tx_packets, rx_bytes,rx_packets from monitor.proc_network_stats()" | ||
metrics: | ||
- interface: | ||
usage: "LABEL" | ||
- tx_bytes: | ||
usage: "GAUGE" | ||
description: "Number of bytes transmitted" | ||
- tx_packets: | ||
usage: "GAUGE" | ||
description: "Number of packets transmitted" | ||
- rx_bytes: | ||
usage: "GAUGE" | ||
description: "Number of bytes received" | ||
- rx_packets: | ||
usage: "GAUGE" | ||
description: "Number of packets received" | ||
|
||
ccp_nodemx_process: | ||
query: "SELECT monitor.cgroup_process_count() as count" | ||
metrics: | ||
- count: | ||
usage: "GAUGE" | ||
description: "Total number of database processes" | ||
|
||
|
||
ccp_nodemx_mem: | ||
query: "SELECT monitor.kdapi_scalar_bigint('mem_request') as request, monitor.kdapi_scalar_bigint('mem_limit') as limit, (select val from monitor.cgroup_setof_kv('memory.stat') where key='cache') as cache, (select val from monitor.cgroup_setof_kv('memory.stat') where key='rss') as rss, (select val from monitor.cgroup_setof_kv('memory.stat') where key='shmem') as shmem, (select val from monitor.cgroup_setof_kv('memory.stat') where key='mapped_file') as mapped_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='dirty') as dirty, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_anon') as active_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_anon') as inactive_anon, (select val from monitor.cgroup_setof_kv('memory.stat') where key='active_file') as active_file, (select val from monitor.cgroup_setof_kv('memory.stat') where key='inactive_file') as inactive_file" | ||
metrics: | ||
- request: | ||
usage: "GAUGE" | ||
description: "Memory request value in bytes" | ||
- limit: | ||
usage: "GAUGE" | ||
description: "Memory limit value in bytes" | ||
- cache: | ||
usage: "GAUGE" | ||
description: "Total bytes of page cache memory" | ||
- rss: | ||
usage: "GAUGE" | ||
description: "Total bytes of anonymous and swap cache memory" | ||
- shmem: | ||
usage: "GAUGE" | ||
description: "Total bytes of shared memory" | ||
- mapped_file: | ||
usage: "GAUGE" | ||
description: "Total bytes of mapped file (includes tmpfs/shmem)" | ||
- mapped_file: | ||
usage: "GAUGE" | ||
description: "Total bytes of mapped file (includes tmpfs/shmem)" | ||
- dirty: | ||
usage: "GAUGE" | ||
description: "Total bytes that are waiting to get written back to the disk" | ||
- active_anon: | ||
usage: "GAUGE" | ||
description: "Total bytes of anonymous and swap cache memory on active LRU list" | ||
- inactive_anon: | ||
usage: "GAUGE" | ||
description: "Total bytes of anonymous and swap cache memory on inactive LRU list" | ||
- active_file: | ||
usage: "GAUGE" | ||
description: "Total bytes of file-backed memory on active LRU list" | ||
- inactive_file: | ||
usage: "GAUGE" | ||
description: "Total bytes of file-backed memory on inactive LRU list" | ||
|
||
|
||
ccp_nodemx_cpu: | ||
query: "SELECT monitor.kdapi_scalar_bigint('cpu_request') as request, monitor.kdapi_scalar_bigint('cpu_limit') as limit" | ||
metrics: | ||
- request: | ||
usage: "GAUGE" | ||
description: "CPU request value in milli cores" | ||
- limit: | ||
usage: "GAUGE" | ||
description: "CPU limit value in milli cores" | ||
|
||
ccp_nodemx_cpucfs: | ||
query: "SELECT monitor.cgroup_scalar_bigint('cpu.cfs_period_us') as period_us, monitor.cgroup_scalar_bigint('cpu.cfs_quota_us') as quota_us" | ||
metrics: | ||
- period_us: | ||
usage: "GAUGE" | ||
description: "the total available run-time within a period (in microseconds)" | ||
- quota_us: | ||
usage: "GAUGE" | ||
description: "the length of a period (in microseconds)" | ||
|
||
ccp_nodemx_cpuacct: | ||
query: "SELECT monitor.cgroup_scalar_bigint('cpuacct.usage') as usage" | ||
metrics: | ||
- usage: | ||
usage: "GAUGE" | ||
description: "CPU usage in nanoseconds" | ||
|
||
ccp_nodemx_cpustat: | ||
query: "select (SELECT val as nr_periods FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_periods'), (SELECT val as nr_throttled FROM monitor.cgroup_setof_kv('cpu.stat') where key='nr_throttled'), (SELECT val as throttled_time FROM monitor.cgroup_setof_kv('cpu.stat') where key='throttled_time')" | ||
metrics: | ||
- nr_threads: | ||
usage: "GAUGE" | ||
description: "number of periods that any thread was runnable" | ||
- nr_throttled: | ||
usage: "GAUGE" | ||
description: "number of runnable periods in which the application used its entire quota and was throttled" | ||
- throttled_time: | ||
usage: "GAUGE" | ||
description: "sum total amount of time individual threads within the monitor.cgroup were throttled" | ||
|
||
|
||
ccp_nodemx_data_disk: | ||
query: "SELECT mount_point, fs_type,reads_completed_successfully as reads, sectors_read, writes_completed as writes,sectors_written, total_bytes,available_bytes,total_file_nodes,free_file_nodes | ||
FROM monitor.proc_mountinfo() m | ||
JOIN monitor.proc_diskstats() d USING (major_number, minor_number) | ||
JOIN monitor.fsinfo(m.mount_point) f USING (major_number, minor_number) | ||
WHERE m.mount_point like '/pg%' or m.mount_point like '/tablespace%'" | ||
metrics: | ||
- mount_point: | ||
usage: "LABEL" | ||
description: "mount point" | ||
- fs_type: | ||
usage: "GAUGE" | ||
description: "File system type" | ||
- reads: | ||
usage: "GAUGE" | ||
description: "Total reads" | ||
- sectors_read: | ||
usage: "GAUGE" | ||
description: "Total sectors read" | ||
- writes: | ||
usage: "GAUGE" | ||
description: "Total writes" | ||
- sectors_written: | ||
usage: "GAUGE" | ||
description: "Total sectors writen" | ||
- total_bytes: | ||
usage: "GAUGE" | ||
description: "Size in bytes" | ||
- available_bytes: | ||
usage: "GAUGE" | ||
description: "Available size in bytes" | ||
- total_file_nodes: | ||
usage: "GAUGE" | ||
description: "Total file nodes" | ||
- free_file_nodes: | ||
usage: "GAUGE" | ||
description: "Available file nodes" | ||
|
||
### | ||
# | ||
# End File: pod_metrics.yml | ||
# | ||
### |
Oops, something went wrong.