From 483926a283e118590da3f9ecfa75a8a4d62143ce Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 1 Dec 2021 07:15:11 +0100 Subject: Merging upstream version 1.32.0. Signed-off-by: Daniel Baumann --- collectors/python.d.plugin/postgres/README.md | 19 ++ .../python.d.plugin/postgres/postgres.chart.py | 245 +++++++++++++++++++-- collectors/python.d.plugin/postgres/postgres.conf | 9 +- 3 files changed, 243 insertions(+), 30 deletions(-) (limited to 'collectors/python.d.plugin/postgres') diff --git a/collectors/python.d.plugin/postgres/README.md b/collectors/python.d.plugin/postgres/README.md index dc9b18467..0515ec57c 100644 --- a/collectors/python.d.plugin/postgres/README.md +++ b/collectors/python.d.plugin/postgres/README.md @@ -12,6 +12,8 @@ Collects database health and performance metrics. - `python-psycopg2` package. You have to install it manually and make sure that it is available to the `netdata` user, either using `pip`, the package manager of your Linux distribution, or any other method you prefer. +- PostgreSQL v9.4+ + Following charts are drawn: 1. **Database size** MB @@ -68,6 +70,23 @@ Following charts are drawn: - locks +12. **Standby delta** KB + + - sent delta + - write delta + - flush delta + - replay delta + +13. **Standby lag** seconds + + - write lag + - flush lag + - replay lag + +14. **Average number of blocking transactions in db** processes + + - blocking + ## Configuration Edit the `python.d/postgres.conf` configuration file using `edit-config` from the Netdata [config diff --git a/collectors/python.d.plugin/postgres/postgres.chart.py b/collectors/python.d.plugin/postgres/postgres.chart.py index bd28dd9b7..29026a6a3 100644 --- a/collectors/python.d.plugin/postgres/postgres.chart.py +++ b/collectors/python.d.plugin/postgres/postgres.chart.py @@ -45,14 +45,18 @@ QUERY_NAME_INDEX_STATS = 'INDEX_STATS' QUERY_NAME_DATABASE = 'DATABASE' QUERY_NAME_BGWRITER = 'BGWRITER' QUERY_NAME_LOCKS = 'LOCKS' +QUERY_NAME_BLOCKERS = 'BLOCKERS' QUERY_NAME_DATABASES = 'DATABASES' QUERY_NAME_STANDBY = 'STANDBY' QUERY_NAME_REPLICATION_SLOT = 'REPLICATION_SLOT' QUERY_NAME_STANDBY_DELTA = 'STANDBY_DELTA' +QUERY_NAME_STANDBY_LAG = 'STANDBY_LAG' QUERY_NAME_REPSLOT_FILES = 'REPSLOT_FILES' QUERY_NAME_IF_SUPERUSER = 'IF_SUPERUSER' QUERY_NAME_SERVER_VERSION = 'SERVER_VERSION' QUERY_NAME_AUTOVACUUM = 'AUTOVACUUM' +QUERY_NAME_FORCED_AUTOVACUUM = 'FORCED_AUTOVACUUM' +QUERY_NAME_TX_WRAPAROUND = 'TX_WRAPAROUND' QUERY_NAME_DIFF_LSN = 'DIFF_LSN' QUERY_NAME_WAL_WRITES = 'WAL_WRITES' @@ -123,6 +127,9 @@ METRICS = { 'ShareLock', 'RowExclusiveLock' ], + QUERY_NAME_BLOCKERS: [ + 'blocking_pids_avg' + ], QUERY_NAME_AUTOVACUUM: [ 'analyze', 'vacuum_analyze', @@ -130,12 +137,24 @@ METRICS = { 'vacuum_freeze', 'brin_summarize' ], + QUERY_NAME_FORCED_AUTOVACUUM: [ + 'percent_towards_forced_vacuum' + ], + QUERY_NAME_TX_WRAPAROUND: [ + 'oldest_current_xid', + 'percent_towards_wraparound' + ], QUERY_NAME_STANDBY_DELTA: [ 'sent_delta', 'write_delta', 'flush_delta', 'replay_delta' ], + QUERY_NAME_STANDBY_LAG: [ + 'write_lag', + 'flush_lag', + 'replay_lag' + ], QUERY_NAME_REPSLOT_FILES: [ 'replslot_wal_keep', 'replslot_files' @@ -177,7 +196,7 @@ FROM FROM pg_catalog.pg_ls_dir('pg_wal') AS wal(name) WHERE name ~ '^[0-9A-F]{24}$' ORDER BY - (pg_stat_file('pg_wal/'||name)).modification, + (pg_stat_file('pg_wal/'||name, true)).modification, wal.name DESC) sub; """, V96: """ @@ -204,7 +223,7 @@ FROM FROM pg_catalog.pg_ls_dir('pg_xlog') AS wal(name) WHERE name ~ '^[0-9A-F]{24}$' ORDER BY - (pg_stat_file('pg_xlog/'||name)).modification, + (pg_stat_file('pg_xlog/'||name, true)).modification, wal.name DESC) sub; """, } @@ -263,7 +282,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE backend_type IN ('client backend', 'background worker') UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -277,7 +296,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE query NOT LIKE 'autovacuum: %%' UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -291,7 +310,7 @@ FROM ( FROM pg_catalog.pg_stat_activity WHERE current_query NOT LIKE 'autovacuum: %%' UNION ALL - SELECT 'r', COUNT(1) + SELECT 'r', COUNT(1) FROM pg_catalog.pg_stat_replication ) as s; """, @@ -386,6 +405,48 @@ ORDER BY datname, mode; """, } +QUERY_BLOCKERS = { + DEFAULT: """ +WITH B AS ( +SELECT DISTINCT + pg_database.datname as database_name, + pg_locks.pid, + cardinality(pg_blocking_pids(pg_locks.pid)) AS blocking_pids +FROM pg_locks +INNER JOIN pg_database ON pg_database.oid = pg_locks.database +WHERE NOT pg_locks.granted) +SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg +FROM B +GROUP BY database_name +""", + V96: """ +WITH B AS ( +SELECT DISTINCT + pg_database.datname as database_name, + blocked_locks.pid AS blocked_pid, + COUNT(blocking_locks.pid) AS blocking_pids +FROM pg_catalog.pg_locks blocked_locks +INNER JOIN pg_database ON pg_database.oid = blocked_locks.database +JOIN pg_catalog.pg_locks blocking_locks + ON blocking_locks.locktype = blocked_locks.locktype + AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database + AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation + AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page + AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple + AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid + AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid + AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid + AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid + AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid + AND blocking_locks.pid != blocked_locks.pid +WHERE NOT blocked_locks.GRANTED +GROUP BY database_name, blocked_pid) +SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg +FROM B +GROUP BY database_name +""" +} + QUERY_DATABASES = { DEFAULT: """ SELECT @@ -394,17 +455,18 @@ FROM pg_stat_database WHERE has_database_privilege( (SELECT current_user), datname, 'connect') - AND NOT datname ~* '^template\d'; + AND NOT datname ~* '^template\d' +ORDER BY datname; """, } QUERY_STANDBY = { DEFAULT: """ SELECT - application_name -FROM pg_stat_replication -WHERE application_name IS NOT NULL -GROUP BY application_name; + COALESCE(prs.slot_name, psr.application_name) application_name +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid +WHERE application_name IS NOT NULL; """, } @@ -418,7 +480,7 @@ FROM pg_replication_slots; QUERY_STANDBY_DELTA = { DEFAULT: """ SELECT - application_name, + COALESCE(prs.slot_name, psr.application_name) application_name, pg_wal_lsn_diff( CASE pg_is_in_recovery() WHEN true THEN pg_last_wal_receive_lsn() @@ -443,12 +505,13 @@ SELECT ELSE pg_current_wal_lsn() END, replay_lsn) AS replay_delta -FROM pg_stat_replication +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid WHERE application_name IS NOT NULL; """, V96: """ SELECT - application_name, + COALESCE(prs.slot_name, psr.application_name) application_name, pg_xlog_location_diff( CASE pg_is_in_recovery() WHEN true THEN pg_last_xlog_receive_location() @@ -473,11 +536,25 @@ SELECT ELSE pg_current_xlog_location() END, replay_location) AS replay_delta -FROM pg_stat_replication +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid WHERE application_name IS NOT NULL; """, } +QUERY_STANDBY_LAG = { + DEFAULT: """ +SELECT + COALESCE(prs.slot_name, psr.application_name) application_name, + COALESCE(EXTRACT(EPOCH FROM write_lag)::bigint, 0) AS write_lag, + COALESCE(EXTRACT(EPOCH FROM flush_lag)::bigint, 0) AS flush_lag, + COALESCE(EXTRACT(EPOCH FROM replay_lag)::bigint, 0) AS replay_lag +FROM pg_stat_replication psr +LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid +WHERE application_name IS NOT NULL; +""" +} + QUERY_REPSLOT_FILES = { DEFAULT: """ WITH wal_size AS ( @@ -500,8 +577,20 @@ FROM slot_type, COALESCE ( floor( - (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn) - - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val) + CASE WHEN pg_is_in_recovery() + THEN ( + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val) + ) / s.val + ELSE ( + pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_walfile_name_offset(restart_lsn)).file_offset + ) / s.val + END ),0) AS replslot_wal_keep FROM pg_replication_slots slot LEFT JOIN ( @@ -539,8 +628,20 @@ FROM slot_type, COALESCE ( floor( - (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn) - - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val) + CASE WHEN pg_is_in_recovery() + THEN ( + pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val) + ) / s.val + ELSE ( + pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn) + -- this is needed to account for whole WAL retention and + -- not only size retention + + (pg_walfile_name_offset(restart_lsn)).file_offset + ) / s.val + END ),0) AS replslot_wal_keep FROM pg_replication_slots slot LEFT JOIN ( @@ -586,6 +687,43 @@ WHERE query NOT LIKE '%%pg_stat_activity%%'; """, } +QUERY_FORCED_AUTOVACUUM = { + DEFAULT: """ +WITH max_age AS ( + SELECT setting AS autovacuum_freeze_max_age + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age' ) +, per_database_stats AS ( + SELECT datname + , m.autovacuum_freeze_max_age::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn ) +SELECT max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_forced_autovacuum +FROM per_database_stats; +""", +} + +QUERY_TX_WRAPAROUND = { + DEFAULT: """ +WITH max_age AS ( + SELECT 2000000000 as max_old_xid + FROM pg_catalog.pg_settings + WHERE name = 'autovacuum_freeze_max_age' ) +, per_database_stats AS ( + SELECT datname + , m.max_old_xid::int + , age(d.datfrozenxid) AS oldest_current_xid + FROM pg_catalog.pg_database d + JOIN max_age m ON (true) + WHERE d.datallowconn ) +SELECT max(oldest_current_xid) AS oldest_current_xid + , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound +FROM per_database_stats; +""", +} + QUERY_DIFF_LSN = { DEFAULT: """ SELECT @@ -632,6 +770,10 @@ def query_factory(name, version=NO_VERSION): return QUERY_BGWRITER[DEFAULT] elif name == QUERY_NAME_LOCKS: return QUERY_LOCKS[DEFAULT] + elif name == QUERY_NAME_BLOCKERS: + if version < 90600: + return QUERY_BLOCKERS[V96] + return QUERY_BLOCKERS[DEFAULT] elif name == QUERY_NAME_DATABASES: return QUERY_DATABASES[DEFAULT] elif name == QUERY_NAME_STANDBY: @@ -644,6 +786,10 @@ def query_factory(name, version=NO_VERSION): return QUERY_SHOW_VERSION[DEFAULT] elif name == QUERY_NAME_AUTOVACUUM: return QUERY_AUTOVACUUM[DEFAULT] + elif name == QUERY_NAME_FORCED_AUTOVACUUM: + return QUERY_FORCED_AUTOVACUUM[DEFAULT] + elif name == QUERY_NAME_TX_WRAPAROUND: + return QUERY_TX_WRAPAROUND[DEFAULT] elif name == QUERY_NAME_WAL: if version < 100000: return QUERY_WAL[V96] @@ -656,6 +802,8 @@ def query_factory(name, version=NO_VERSION): if version < 100000: return QUERY_STANDBY_DELTA[V96] return QUERY_STANDBY_DELTA[DEFAULT] + elif name == QUERY_NAME_STANDBY_LAG: + return QUERY_STANDBY_LAG[DEFAULT] elif name == QUERY_NAME_REPSLOT_FILES: if version < 110000: return QUERY_REPSLOT_FILES[V10] @@ -676,6 +824,7 @@ ORDER = [ 'db_stat_tuple_write', 'db_stat_transactions', 'db_stat_connections', + 'db_stat_blocking_pids_avg', 'database_size', 'backend_process', 'backend_usage', @@ -695,7 +844,11 @@ ORDER = [ 'stat_bgwriter_maxwritten', 'replication_slot', 'standby_delta', - 'autovacuum' + 'standby_lag', + 'autovacuum', + 'forced_autovacuum', + 'tx_wraparound_oldest_current_xid', + 'tx_wraparound_percent_towards_wraparound' ] CHARTS = { @@ -752,6 +905,13 @@ CHARTS = { ['temp_files', 'files', 'incremental'] ] }, + 'db_stat_blocking_pids_avg': { + 'options': [None, 'Average number of blocking transactions in db', 'processes', 'db statistics', + 'postgres.db_stat_blocking_pids_avg', 'line'], + 'lines': [ + ['blocking_pids_avg', 'blocking', 'absolute'] + ] + }, 'database_size': { 'options': [None, 'Database size', 'MiB', 'database size', 'postgres.db_size', 'stacked'], 'lines': [ @@ -875,6 +1035,24 @@ CHARTS = { ['brin_summarize', 'brin summarize', 'absolute'] ] }, + 'forced_autovacuum': { + 'options': [None, 'Percent towards forced autovacuum', 'percent', 'autovacuum', 'postgres.forced_autovacuum', 'line'], + 'lines': [ + ['percent_towards_forced_autovacuum', 'percent', 'absolute'] + ] + }, + 'tx_wraparound_oldest_current_xid': { + 'options': [None, 'Oldest current XID', 'xid', 'tx_wraparound', 'postgres.tx_wraparound_oldest_current_xid', 'line'], + 'lines': [ + ['oldest_current_xid', 'xid', 'absolute'] + ] + }, + 'tx_wraparound_percent_towards_wraparound': { + 'options': [None, 'Percent towards wraparound', 'percent', 'tx_wraparound', 'postgres.percent_towards_wraparound', 'line'], + 'lines': [ + ['percent_towards_wraparound', 'percent', 'absolute'] + ] + }, 'standby_delta': { 'options': [None, 'Standby delta', 'KiB', 'replication delta', 'postgres.standby_delta', 'line'], 'lines': [ @@ -884,6 +1062,14 @@ CHARTS = { ['replay_delta', 'replay delta', 'absolute', 1, 1024] ] }, + 'standby_lag': { + 'options': [None, 'Standby lag', 'seconds', 'replication lag', 'postgres.standby_lag', 'line'], + 'lines': [ + ['write_lag', 'write lag', 'absolute'], + ['flush_lag', 'flush lag', 'absolute'], + ['replay_lag', 'replay lag', 'absolute'] + ] + }, 'replication_slot': { 'options': [None, 'Replication slot files', 'files', 'replication slot', 'postgres.replication_slot', 'line'], 'lines': [ @@ -1073,6 +1259,7 @@ class Service(SimpleService): self.queries[query_factory(QUERY_NAME_BGWRITER)] = METRICS[QUERY_NAME_BGWRITER] self.queries[query_factory(QUERY_NAME_DIFF_LSN, self.server_version)] = METRICS[QUERY_NAME_WAL_WRITES] self.queries[query_factory(QUERY_NAME_STANDBY_DELTA, self.server_version)] = METRICS[QUERY_NAME_STANDBY_DELTA] + self.queries[query_factory(QUERY_NAME_BLOCKERS, self.server_version)] = METRICS[QUERY_NAME_BLOCKERS] if self.do_index_stats: self.queries[query_factory(QUERY_NAME_INDEX_STATS)] = METRICS[QUERY_NAME_INDEX_STATS] @@ -1092,6 +1279,12 @@ class Service(SimpleService): if self.server_version >= 90400: self.queries[query_factory(QUERY_NAME_AUTOVACUUM)] = METRICS[QUERY_NAME_AUTOVACUUM] + self.queries[query_factory(QUERY_NAME_FORCED_AUTOVACUUM)] = METRICS[QUERY_NAME_FORCED_AUTOVACUUM] + self.queries[query_factory(QUERY_NAME_TX_WRAPAROUND)] = METRICS[QUERY_NAME_TX_WRAPAROUND] + + if self.server_version >= 100000: + self.queries[query_factory(QUERY_NAME_STANDBY_LAG)] = METRICS[QUERY_NAME_STANDBY_LAG] + def create_dynamic_charts(self): for database_name in self.databases[::-1]: dim = [ @@ -1116,11 +1309,19 @@ class Service(SimpleService): ) for application_name in self.secondaries[::-1]: - add_replication_delta_chart( + add_replication_standby_chart( order=self.order, definitions=self.definitions, name='standby_delta', application_name=application_name, + chart_family='replication delta', + ) + add_replication_standby_chart( + order=self.order, + definitions=self.definitions, + name='standby_lag', + application_name=application_name, + chart_family='replication lag', ) for slot_name in self.replication_slots[::-1]: @@ -1199,7 +1400,7 @@ def add_database_stat_chart(order, definitions, name, database_name): 'lines': create_lines(database_name, chart_template['lines'])} -def add_replication_delta_chart(order, definitions, name, application_name): +def add_replication_standby_chart(order, definitions, name, application_name, chart_family): def create_lines(standby, lines): result = list() for line in lines: @@ -1213,7 +1414,7 @@ def add_replication_delta_chart(order, definitions, name, application_name): order.insert(position, chart_name) name, title, units, _, context, chart_type = chart_template['options'] definitions[chart_name] = { - 'options': [name, title + ': ' + application_name, units, 'replication delta', context, chart_type], + 'options': [name, title + ': ' + application_name, units, chart_family, context, chart_type], 'lines': create_lines(application_name, chart_template['lines'])} diff --git a/collectors/python.d.plugin/postgres/postgres.conf b/collectors/python.d.plugin/postgres/postgres.conf index 1970a7a27..7e354d99b 100644 --- a/collectors/python.d.plugin/postgres/postgres.conf +++ b/collectors/python.d.plugin/postgres/postgres.conf @@ -97,14 +97,7 @@ # the client (Netdata) is not considered local, unless it runs from inside # the same container. # -# Postgres supported versions are : -# - 9.3 (without autovacuum) -# - 9.4 -# - 9.5 -# - 9.6 -# - 10 -# -# Superuser access is needed for theses charts: +# Superuser access is needed for these charts: # Write-Ahead Logs # Archive Write-Ahead Logs # -- cgit v1.2.3