From f578f83bd45d82a6a115fef0f32154f40e66ec00 Mon Sep 17 00:00:00 2001 From: Keith Fiske Date: Tue, 22 Sep 2020 11:12:55 -0400 Subject: [PATCH] Add metric for monitoring for blocked queries (#207) --- exporter/postgres/queries_pg10.yml | 4 ++++ exporter/postgres/queries_pg11.yml | 4 ++++ exporter/postgres/queries_pg12.yml | 4 ++++ exporter/postgres/queries_pg13.yml | 4 ++++ exporter/postgres/queries_pg95.yml | 4 ++++ exporter/postgres/queries_pg96.yml | 4 ++++ hugo/content/changelog/_index.md | 1 + hugo/content/exporter/_index.md | 2 ++ 8 files changed, 27 insertions(+) diff --git a/exporter/postgres/queries_pg10.yml b/exporter/postgres/queries_pg10.yml index cfeab6a8..c5c486b7 100644 --- a/exporter/postgres/queries_pg10.yml +++ b/exporter/postgres/queries_pg10.yml @@ -11,6 +11,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and state <> 'idle' ) as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -36,6 +37,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/exporter/postgres/queries_pg11.yml b/exporter/postgres/queries_pg11.yml index eb30e9ff..2d2c498d 100644 --- a/exporter/postgres/queries_pg11.yml +++ b/exporter/postgres/queries_pg11.yml @@ -11,6 +11,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and state <> 'idle' ) as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -36,6 +37,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/exporter/postgres/queries_pg12.yml b/exporter/postgres/queries_pg12.yml index 14fc568d..b7b7a7c1 100644 --- a/exporter/postgres/queries_pg12.yml +++ b/exporter/postgres/queries_pg12.yml @@ -11,6 +11,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and state <> 'idle' ) as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -36,6 +37,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/exporter/postgres/queries_pg13.yml b/exporter/postgres/queries_pg13.yml index 58b4cfa3..2fd80c12 100644 --- a/exporter/postgres/queries_pg13.yml +++ b/exporter/postgres/queries_pg13.yml @@ -11,6 +11,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from pg_catalog.pg_stat_activity where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and state <> 'idle' ) as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -36,6 +37,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/exporter/postgres/queries_pg95.yml b/exporter/postgres/queries_pg95.yml index 080f1431..a16d3e9f 100644 --- a/exporter/postgres/queries_pg95.yml +++ b/exporter/postgres/queries_pg95.yml @@ -11,6 +11,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from monitor.pg_stat_activity() where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from monitor.pg_stat_activity() where state <> 'idle') as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from monitor.pg_stat_activity() where waiting = 't' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -36,6 +37,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/exporter/postgres/queries_pg96.yml b/exporter/postgres/queries_pg96.yml index 8149050a..aa629561 100644 --- a/exporter/postgres/queries_pg96.yml +++ b/exporter/postgres/queries_pg96.yml @@ -12,6 +12,7 @@ ccp_connection_stats: , idle_in_txn , (select coalesce(extract(epoch from (max(now() - state_change))),0) from monitor.pg_stat_activity() where state = 'idle in transaction') as max_idle_in_txn_time , (select coalesce(extract(epoch from (max(now() - query_start))),0) from monitor.pg_stat_activity() where state <> 'idle') as max_query_time + , (select coalesce(extract(epoch from (max(now() - query_start))),0) from pg_catalog.pg_stat_activity where backend_type = 'client backend' and wait_event_type = 'Lock' ) as max_blocked_query_time , max_connections from ( select count(*) as total @@ -37,6 +38,9 @@ ccp_connection_stats: - max_query_time: usage: "GAUGE" description: "Length of time in seconds of the longest running query" + - max_blocked_query_time: + usage: "GAUGE" + description: "Length of time in seconds of the longest running query that has been blocked by a heavyweight lock" - max_connections: usage: "GAUGE" description: "Value of max_connections for the monitored database" diff --git a/hugo/content/changelog/_index.md b/hugo/content/changelog/_index.md index f196c5c2..3fd7253c 100644 --- a/hugo/content/changelog/_index.md +++ b/hugo/content/changelog/_index.md @@ -9,6 +9,7 @@ weight: 5 * Added support for PostgreSQL 13 * Added queries and dashboards for pgnodemx/container support + * Added metrics for monitoring longest blocked query time ### Bug Fixes diff --git a/hugo/content/exporter/_index.md b/hugo/content/exporter/_index.md index 4c7d0e8b..fc0b49bb 100644 --- a/hugo/content/exporter/_index.md +++ b/hugo/content/exporter/_index.md @@ -507,6 +507,8 @@ The following metrics either require special considerations when monitoring spec * *ccp_connection_stats_idle_in_txn* - Count of idle in transaction connections + * *ccp_connection_stats_max_blocked_query_time* - Runtime of longest running query that has been blocked by a heavyweight lock + * *ccp_connection_stats_max_connections* - Current value of max_connections for reference * *ccp_connection_stats_max_idle_in_txn_time* - Runtime of longest idle in transaction (IIT) session.