From d52f09925a44a67493bd8c598a42c7455e182d5e Mon Sep 17 00:00:00 2001 From: Alexey Lesovsky Date: Thu, 2 Sep 2021 10:53:16 +0500 Subject: [PATCH] Refactor metrics based on pg_stat_wal: - return metric about written WAL based on pg_current_wal_lsn(); - make separate metric based on pg_stat_wal.wal_bytes; - count WAL fpi as is (not in bytes), due to FPI's might be compressed and its size is unknown; - remove 'written' word from some metrics names, because it is misleading. --- internal/collector/postgres_wal.go | 52 ++++++++++++++----------- internal/collector/postgres_wal_test.go | 19 ++++----- 2 files changed, 40 insertions(+), 31 deletions(-) diff --git a/internal/collector/postgres_wal.go b/internal/collector/postgres_wal.go index a21f034..7604976 100644 --- a/internal/collector/postgres_wal.go +++ b/internal/collector/postgres_wal.go @@ -10,29 +10,29 @@ import ( const ( postgresWalQuery96 = "SELECT pg_is_in_recovery()::int AS recovery, " + - "(case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) - '0/00000000' AS wal_bytes" + "(case pg_is_in_recovery() when 't' then pg_last_xlog_receive_location() else pg_current_xlog_location() end) - '0/00000000' AS wal_written" postgresWalQuery13 = "SELECT pg_is_in_recovery()::int AS recovery, " + - "(case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end) - '0/00000000' AS wal_bytes" + "(case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end) - '0/00000000' AS wal_written" postgresWalQueryLatest = "SELECT pg_is_in_recovery()::int AS recovery, wal_records, wal_fpi, " + - "(case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() - '0/00000000' else wal_bytes end) AS wal_bytes, " + - "wal_buffers_full, wal_write, wal_sync, wal_write_time, wal_sync_time, " + - "extract('epoch' from stats_reset) as reset_time " + + "(case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() - '0/00000000' else pg_current_wal_lsn() - '0/00000000' end) AS wal_written, " + + "wal_bytes, wal_buffers_full, wal_write, wal_sync, wal_write_time, wal_sync_time, extract('epoch' from stats_reset) as reset_time " + "FROM pg_stat_wal" ) type postgresWalCollector struct { - recovery typedDesc - records typedDesc - writtenAllBytes typedDesc - writtenBytes typedDesc - buffersFull typedDesc - writes typedDesc - syncs typedDesc - secondsAll typedDesc - seconds typedDesc - resetUnix typedDesc + recovery typedDesc + records typedDesc + fpi typedDesc + bytes typedDesc + writtenBytes typedDesc // based on pg_current_wal_lsn() + buffersFull typedDesc + writes typedDesc + syncs typedDesc + secondsAll typedDesc + seconds typedDesc + resetUnix typedDesc } // NewPostgresWalCollector returns a new Collector exposing postgres WAL stats. @@ -46,21 +46,27 @@ func NewPostgresWalCollector(constLabels labels, settings model.CollectorSetting settings.Filters, ), records: newBuiltinTypedDesc( - descOpts{"postgres", "wal", "written_records_total", "Total amount of WAL records written (zero in case of standby).", 0}, + descOpts{"postgres", "wal", "records_total", "Total number of WAL records generated (zero in case of standby).", 0}, prometheus.CounterValue, nil, constLabels, settings.Filters, ), - writtenAllBytes: newBuiltinTypedDesc( - descOpts{"postgres", "wal", "written_bytes_all_total", "Total amount of WAL written (or received in case of standby), in bytes.", 0}, + fpi: newBuiltinTypedDesc( + descOpts{"postgres", "wal", "fpi_total", "Total number of WAL full page images generated (zero in case of standby).", 0}, + prometheus.CounterValue, + nil, constLabels, + settings.Filters, + ), + bytes: newBuiltinTypedDesc( + descOpts{"postgres", "wal", "bytes_total", "Total amount of WAL generated (zero in case of standby) since last stats reset, in bytes.", 0}, prometheus.CounterValue, nil, constLabels, settings.Filters, ), writtenBytes: newBuiltinTypedDesc( - descOpts{"postgres", "wal", "written_bytes_total", "Total amount of WAL written by each type of WAL (zero in case of standby), in bytes.", 0}, + descOpts{"postgres", "wal", "written_bytes_total", "Total amount of WAL written (or received in case of standby) since cluster init, in bytes.", 0}, prometheus.CounterValue, - []string{"wal"}, constLabels, + nil, constLabels, settings.Filters, ), buffersFull: newBuiltinTypedDesc( @@ -125,9 +131,11 @@ func (c *postgresWalCollector) Update(config Config, ch chan<- prometheus.Metric case "wal_records": ch <- c.records.newConstMetric(v) case "wal_fpi": - ch <- c.writtenBytes.newConstMetric(v*float64(config.walBlockSize), "fpi") + ch <- c.fpi.newConstMetric(v) case "wal_bytes": - ch <- c.writtenAllBytes.newConstMetric(v) + ch <- c.bytes.newConstMetric(v) + case "wal_written": + ch <- c.writtenBytes.newConstMetric(v) case "wal_buffers_full": ch <- c.buffersFull.newConstMetric(v) case "wal_write": diff --git a/internal/collector/postgres_wal_test.go b/internal/collector/postgres_wal_test.go index f5bd76f..e5d39b1 100644 --- a/internal/collector/postgres_wal_test.go +++ b/internal/collector/postgres_wal_test.go @@ -12,13 +12,14 @@ func TestPostgresWalCollector_Update(t *testing.T) { var input = pipelineInput{ required: []string{ "postgres_recovery_info", - "postgres_wal_written_bytes_all_total", + "postgres_wal_written_bytes_total", }, // TODO: wait until Postgres 14 has been released, update Postgres version on pgscv-testing docker image // and move these metrics to 'required' slice. optional: []string{ - "postgres_wal_written_records_total", - "postgres_wal_written_bytes_total", + "postgres_wal_records_total", + "postgres_wal_fpi_total", + "postgres_wal_bytes_total", "postgres_wal_buffers_full_total", "postgres_wal_write_total", "postgres_wal_sync_total", @@ -43,17 +44,17 @@ func Test_parsePostgresWalStats(t *testing.T) { name: "pg14", res: &model.PGResult{ Nrows: 1, - Ncols: 10, + Ncols: 11, Colnames: []pgproto3.FieldDescription{ {Name: []byte("recovery")}, - {Name: []byte("wal_records")}, {Name: []byte("wal_fpi")}, {Name: []byte("wal_bytes")}, + {Name: []byte("wal_records")}, {Name: []byte("wal_fpi")}, {Name: []byte("wal_bytes")}, {Name: []byte("wal_written")}, {Name: []byte("wal_buffers_full")}, {Name: []byte("wal_write")}, {Name: []byte("wal_sync")}, {Name: []byte("wal_write_time")}, {Name: []byte("wal_sync_time")}, {Name: []byte("reset_time")}, }, Rows: [][]sql.NullString{ { {String: "0", Valid: true}, - {String: "58452", Valid: true}, {String: "4712", Valid: true}, {String: "587241", Valid: true}, + {String: "58452", Valid: true}, {String: "4712", Valid: true}, {String: "587241", Valid: true}, {String: "8746951", Valid: true}, {String: "1234", Valid: true}, {String: "48541", Valid: true}, {String: "8541", Valid: true}, {String: "874215", Valid: true}, {String: "48736", Valid: true}, {String: "123456789", Valid: true}, }, @@ -61,7 +62,7 @@ func Test_parsePostgresWalStats(t *testing.T) { }, want: map[string]float64{ "recovery": 0, - "wal_records": 58452, "wal_fpi": 4712, "wal_bytes": 587241, + "wal_records": 58452, "wal_fpi": 4712, "wal_bytes": 587241, "wal_written": 8746951, "wal_buffers_full": 1234, "wal_write": 48541, "wal_sync": 8541, "wal_write_time": 874215, "wal_sync_time": 48736, "wal_all_time": 922951, "reset_time": 123456789, }, @@ -72,11 +73,11 @@ func Test_parsePostgresWalStats(t *testing.T) { Nrows: 1, Ncols: 2, Colnames: []pgproto3.FieldDescription{ - {Name: []byte("recovery")}, {Name: []byte("wal_bytes")}, + {Name: []byte("recovery")}, {Name: []byte("wal_written")}, }, Rows: [][]sql.NullString{{{String: "0", Valid: true}, {String: "123456789", Valid: true}}}, }, - want: map[string]float64{"recovery": 0, "wal_bytes": 123456789}, + want: map[string]float64{"recovery": 0, "wal_written": 123456789}, }, }