-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge development for v4.4-RC6 (#209)
* Trim whitespace (#208) * trim whitespace in: crunchy-alert-rules-node.yml.example * trim whitespace in: crunchy-alert-rules-etcd.yml.example * trim whitespace in: crunchy-alert-rules-blackbox.yml.example * trim whitespaces in: crunchy-alert-rules-pg.yml.containers.example * trim whitespace in: crunchy-alert-rules-pg.yml.example * Add new files for PG13 support (#201) * Add metric for monitoring for blocked queries (#207) * Add pg_stat_statements metrics & dashboard (#200) Co-authored-by: Joseph Mckulka <[email protected]> Co-authored-by: Keith Fiske <[email protected]>
- Loading branch information
1 parent
328f831
commit 41ef700
Showing
35 changed files
with
1,764 additions
and
528 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 0 additions & 83 deletions
83
exporter/postgres/crunchy-postgres-exporter-pg94-el6.service
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
### | ||
# | ||
# Begin File: queries_pg13.yml | ||
# | ||
### | ||
|
||
ccp_connection_stats: | ||
query: "select ((total - idle) - idle_in_txn) as active | ||
, total | ||
, idle | ||
, idle_in_txn | ||
, (select coalesce(extract(epoch from (max(now() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') as max_idle_in_txn_time | ||
, (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and state <> 'idle' ) as max_query_time | ||
, (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time | ||
, max_connections | ||
from ( | ||
select count(*) as total | ||
, coalesce(sum(case when state = 'idle' then 1 else 0 end),0) as idle | ||
, coalesce(sum(case when state = 'idle in transaction' then 1 else 0 end),0) as idle_in_txn from pg_catalog.pg_stat_activity) x | ||
join (select setting::float AS max_connections FROM pg_settings WHERE name = 'max_connections') xx ON (true);" | ||
metrics: | ||
- active: | ||
usage: "GAUGE" | ||
description: "Total non-idle connections" | ||
- total: | ||
usage: "GAUGE" | ||
description: "Total idle and non-idle connections" | ||
- idle: | ||
usage: "GAUGE" | ||
description: "Total idle connections" | ||
- idle_in_txn: | ||
usage: "GAUGE" | ||
description: "Total idle in transaction connections" | ||
- max_idle_in_txn_time: | ||
usage: "GAUGE" | ||
description: "Length of time in seconds of the longest idle in transaction session" | ||
- max_query_time: | ||
usage: "GAUGE" | ||
description: "Length of time in seconds of the longest running query" | ||
- max_blocked_query_time: | ||
usage: "GAUGE" | ||
description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" | ||
- max_connections: | ||
usage: "GAUGE" | ||
description: "Value of max_connections for the monitored database" | ||
|
||
|
||
ccp_replication_lag: | ||
query: "SELECT | ||
CASE | ||
WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0 | ||
ELSE EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp())::INTEGER | ||
END | ||
AS replay_time" | ||
metrics: | ||
- replay_time: | ||
usage: "GAUGE" | ||
description: "Length of time since the last transaction was replayed on replica. Will always increase if no writes on primary." | ||
|
||
|
||
ccp_replication_lag_size: | ||
query: "SELECT client_addr as replica | ||
, client_hostname as replica_hostname | ||
, client_port as replica_port | ||
, pg_wal_lsn_diff(sent_lsn, replay_lsn) as bytes | ||
FROM pg_catalog.pg_stat_replication" | ||
metrics: | ||
- replica: | ||
usage: "LABEL" | ||
description: "Replica address" | ||
- replica_hostname: | ||
usage: "LABEL" | ||
description: "Replica hostname" | ||
- replica_port: | ||
usage: "LABEL" | ||
description: "Replica port" | ||
- bytes: | ||
usage: "GAUGE" | ||
description: "Replication lag in bytes" | ||
|
||
|
||
ccp_replication_slots: | ||
query: "SELECT slot_name, active::int, pg_wal_lsn_diff(pg_current_wal_insert_lsn(), restart_lsn) AS retained_bytes FROM pg_catalog.pg_replication_slots" | ||
metrics: | ||
- slot_name: | ||
usage: "LABEL" | ||
description: "Name of replication slot" | ||
- active: | ||
usage: "GAUGE" | ||
description: "Active state of slot. 1 = true. 0 = false." | ||
- retained_bytes: | ||
usage: "GAUGE" | ||
description: "The amount of WAL (in bytes) being retained for this slot" | ||
|
||
|
||
ccp_wal_activity: | ||
query: "SELECT last_5_min_size_bytes, | ||
(SELECT COALESCE(sum(size),0) FROM pg_catalog.pg_ls_waldir()) AS total_size_bytes | ||
FROM (SELECT COALESCE(sum(size),0) AS last_5_min_size_bytes FROM pg_catalog.pg_ls_waldir() WHERE modification > CURRENT_TIMESTAMP - '5 minutes'::interval) x;" | ||
metrics: | ||
- last_5_min_size_bytes: | ||
usage: "GAUGE" | ||
description: "Current size in bytes of the last 5 minutes of WAL generation. Includes recycled WALs." | ||
- total_size_bytes: | ||
usage: "GAUGE" | ||
description: "Current size in bytes of the WAL directory" | ||
|
||
|
||
ccp_data_checksum_failure: | ||
query: "SELECT datname AS dbname | ||
, checksum_failures AS count | ||
, coalesce(extract(epoch from (now() - checksum_last_failure)), 0) AS time_since_last_failure_seconds | ||
FROM pg_catalog.pg_stat_database;" | ||
metrics: | ||
- dbname: | ||
usage: "LABEL" | ||
description: "Database name" | ||
- count: | ||
usage: "GAUGE" | ||
description: "Total number of checksum failures on this database" | ||
- time_since_last_failure_seconds: | ||
usage: "GAUGE" | ||
description: "Time interval in seconds since the last checksum failure was encountered" | ||
|
||
|
||
ccp_pg_hba_checksum: | ||
query: "SELECT monitor.pg_hba_checksum() AS status" | ||
metrics: | ||
- status: | ||
usage: "GAUGE" | ||
description: "Value of checksum monitioring status for pg_catalog.pg_hba_file_rules (pg_hba.conf). 0 = valid config. 1 = settings changed. To reset current config to valid after alert, run monitor.pg_hba_checksum_set_valid()." | ||
|
||
|
||
### | ||
# | ||
# End File: queries_pg13.yml | ||
# | ||
### |
Oops, something went wrong.