summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/postgres/postgres.chart.py
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/postgres/postgres.chart.py')
-rw-r--r--collectors/python.d.plugin/postgres/postgres.chart.py245
1 files changed, 223 insertions, 22 deletions
diff --git a/collectors/python.d.plugin/postgres/postgres.chart.py b/collectors/python.d.plugin/postgres/postgres.chart.py
index bd28dd9b7..29026a6a3 100644
--- a/collectors/python.d.plugin/postgres/postgres.chart.py
+++ b/collectors/python.d.plugin/postgres/postgres.chart.py
@@ -45,14 +45,18 @@ QUERY_NAME_INDEX_STATS = 'INDEX_STATS'
QUERY_NAME_DATABASE = 'DATABASE'
QUERY_NAME_BGWRITER = 'BGWRITER'
QUERY_NAME_LOCKS = 'LOCKS'
+QUERY_NAME_BLOCKERS = 'BLOCKERS'
QUERY_NAME_DATABASES = 'DATABASES'
QUERY_NAME_STANDBY = 'STANDBY'
QUERY_NAME_REPLICATION_SLOT = 'REPLICATION_SLOT'
QUERY_NAME_STANDBY_DELTA = 'STANDBY_DELTA'
+QUERY_NAME_STANDBY_LAG = 'STANDBY_LAG'
QUERY_NAME_REPSLOT_FILES = 'REPSLOT_FILES'
QUERY_NAME_IF_SUPERUSER = 'IF_SUPERUSER'
QUERY_NAME_SERVER_VERSION = 'SERVER_VERSION'
QUERY_NAME_AUTOVACUUM = 'AUTOVACUUM'
+QUERY_NAME_FORCED_AUTOVACUUM = 'FORCED_AUTOVACUUM'
+QUERY_NAME_TX_WRAPAROUND = 'TX_WRAPAROUND'
QUERY_NAME_DIFF_LSN = 'DIFF_LSN'
QUERY_NAME_WAL_WRITES = 'WAL_WRITES'
@@ -123,6 +127,9 @@ METRICS = {
'ShareLock',
'RowExclusiveLock'
],
+ QUERY_NAME_BLOCKERS: [
+ 'blocking_pids_avg'
+ ],
QUERY_NAME_AUTOVACUUM: [
'analyze',
'vacuum_analyze',
@@ -130,12 +137,24 @@ METRICS = {
'vacuum_freeze',
'brin_summarize'
],
+ QUERY_NAME_FORCED_AUTOVACUUM: [
+ 'percent_towards_forced_vacuum'
+ ],
+ QUERY_NAME_TX_WRAPAROUND: [
+ 'oldest_current_xid',
+ 'percent_towards_wraparound'
+ ],
QUERY_NAME_STANDBY_DELTA: [
'sent_delta',
'write_delta',
'flush_delta',
'replay_delta'
],
+ QUERY_NAME_STANDBY_LAG: [
+ 'write_lag',
+ 'flush_lag',
+ 'replay_lag'
+ ],
QUERY_NAME_REPSLOT_FILES: [
'replslot_wal_keep',
'replslot_files'
@@ -177,7 +196,7 @@ FROM
FROM pg_catalog.pg_ls_dir('pg_wal') AS wal(name)
WHERE name ~ '^[0-9A-F]{24}$'
ORDER BY
- (pg_stat_file('pg_wal/'||name)).modification,
+ (pg_stat_file('pg_wal/'||name, true)).modification,
wal.name DESC) sub;
""",
V96: """
@@ -204,7 +223,7 @@ FROM
FROM pg_catalog.pg_ls_dir('pg_xlog') AS wal(name)
WHERE name ~ '^[0-9A-F]{24}$'
ORDER BY
- (pg_stat_file('pg_xlog/'||name)).modification,
+ (pg_stat_file('pg_xlog/'||name, true)).modification,
wal.name DESC) sub;
""",
}
@@ -263,7 +282,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE backend_type IN ('client backend', 'background worker')
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -277,7 +296,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE query NOT LIKE 'autovacuum: %%'
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -291,7 +310,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE current_query NOT LIKE 'autovacuum: %%'
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -386,6 +405,48 @@ ORDER BY datname, mode;
""",
}
+QUERY_BLOCKERS = {
+ DEFAULT: """
+WITH B AS (
+SELECT DISTINCT
+ pg_database.datname as database_name,
+ pg_locks.pid,
+ cardinality(pg_blocking_pids(pg_locks.pid)) AS blocking_pids
+FROM pg_locks
+INNER JOIN pg_database ON pg_database.oid = pg_locks.database
+WHERE NOT pg_locks.granted)
+SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg
+FROM B
+GROUP BY database_name
+""",
+ V96: """
+WITH B AS (
+SELECT DISTINCT
+ pg_database.datname as database_name,
+ blocked_locks.pid AS blocked_pid,
+ COUNT(blocking_locks.pid) AS blocking_pids
+FROM pg_catalog.pg_locks blocked_locks
+INNER JOIN pg_database ON pg_database.oid = blocked_locks.database
+JOIN pg_catalog.pg_locks blocking_locks
+ ON blocking_locks.locktype = blocked_locks.locktype
+ AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
+ AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
+ AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
+ AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
+ AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
+ AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
+ AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
+ AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
+ AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
+ AND blocking_locks.pid != blocked_locks.pid
+WHERE NOT blocked_locks.GRANTED
+GROUP BY database_name, blocked_pid)
+SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg
+FROM B
+GROUP BY database_name
+"""
+}
+
QUERY_DATABASES = {
DEFAULT: """
SELECT
@@ -394,17 +455,18 @@ FROM pg_stat_database
WHERE
has_database_privilege(
(SELECT current_user), datname, 'connect')
- AND NOT datname ~* '^template\d';
+ AND NOT datname ~* '^template\d'
+ORDER BY datname;
""",
}
QUERY_STANDBY = {
DEFAULT: """
SELECT
- application_name
-FROM pg_stat_replication
-WHERE application_name IS NOT NULL
-GROUP BY application_name;
+ COALESCE(prs.slot_name, psr.application_name) application_name
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
+WHERE application_name IS NOT NULL;
""",
}
@@ -418,7 +480,7 @@ FROM pg_replication_slots;
QUERY_STANDBY_DELTA = {
DEFAULT: """
SELECT
- application_name,
+ COALESCE(prs.slot_name, psr.application_name) application_name,
pg_wal_lsn_diff(
CASE pg_is_in_recovery()
WHEN true THEN pg_last_wal_receive_lsn()
@@ -443,12 +505,13 @@ SELECT
ELSE pg_current_wal_lsn()
END,
replay_lsn) AS replay_delta
-FROM pg_stat_replication
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
WHERE application_name IS NOT NULL;
""",
V96: """
SELECT
- application_name,
+ COALESCE(prs.slot_name, psr.application_name) application_name,
pg_xlog_location_diff(
CASE pg_is_in_recovery()
WHEN true THEN pg_last_xlog_receive_location()
@@ -473,11 +536,25 @@ SELECT
ELSE pg_current_xlog_location()
END,
replay_location) AS replay_delta
-FROM pg_stat_replication
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
WHERE application_name IS NOT NULL;
""",
}
+QUERY_STANDBY_LAG = {
+ DEFAULT: """
+SELECT
+ COALESCE(prs.slot_name, psr.application_name) application_name,
+ COALESCE(EXTRACT(EPOCH FROM write_lag)::bigint, 0) AS write_lag,
+ COALESCE(EXTRACT(EPOCH FROM flush_lag)::bigint, 0) AS flush_lag,
+ COALESCE(EXTRACT(EPOCH FROM replay_lag)::bigint, 0) AS replay_lag
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
+WHERE application_name IS NOT NULL;
+"""
+}
+
QUERY_REPSLOT_FILES = {
DEFAULT: """
WITH wal_size AS (
@@ -500,8 +577,20 @@ FROM
slot_type,
COALESCE (
floor(
- (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn)
- - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val)
+ CASE WHEN pg_is_in_recovery()
+ THEN (
+ pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val)
+ ) / s.val
+ ELSE (
+ pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_walfile_name_offset(restart_lsn)).file_offset
+ ) / s.val
+ END
),0) AS replslot_wal_keep
FROM pg_replication_slots slot
LEFT JOIN (
@@ -539,8 +628,20 @@ FROM
slot_type,
COALESCE (
floor(
- (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn)
- - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val)
+ CASE WHEN pg_is_in_recovery()
+ THEN (
+ pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val)
+ ) / s.val
+ ELSE (
+ pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_walfile_name_offset(restart_lsn)).file_offset
+ ) / s.val
+ END
),0) AS replslot_wal_keep
FROM pg_replication_slots slot
LEFT JOIN (
@@ -586,6 +687,43 @@ WHERE query NOT LIKE '%%pg_stat_activity%%';
""",
}
+QUERY_FORCED_AUTOVACUUM = {
+ DEFAULT: """
+WITH max_age AS (
+ SELECT setting AS autovacuum_freeze_max_age
+ FROM pg_catalog.pg_settings
+ WHERE name = 'autovacuum_freeze_max_age' )
+, per_database_stats AS (
+ SELECT datname
+ , m.autovacuum_freeze_max_age::int
+ , age(d.datfrozenxid) AS oldest_current_xid
+ FROM pg_catalog.pg_database d
+ JOIN max_age m ON (true)
+ WHERE d.datallowconn )
+SELECT max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_forced_autovacuum
+FROM per_database_stats;
+""",
+}
+
+QUERY_TX_WRAPAROUND = {
+ DEFAULT: """
+WITH max_age AS (
+ SELECT 2000000000 as max_old_xid
+ FROM pg_catalog.pg_settings
+ WHERE name = 'autovacuum_freeze_max_age' )
+, per_database_stats AS (
+ SELECT datname
+ , m.max_old_xid::int
+ , age(d.datfrozenxid) AS oldest_current_xid
+ FROM pg_catalog.pg_database d
+ JOIN max_age m ON (true)
+ WHERE d.datallowconn )
+SELECT max(oldest_current_xid) AS oldest_current_xid
+ , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound
+FROM per_database_stats;
+""",
+}
+
QUERY_DIFF_LSN = {
DEFAULT: """
SELECT
@@ -632,6 +770,10 @@ def query_factory(name, version=NO_VERSION):
return QUERY_BGWRITER[DEFAULT]
elif name == QUERY_NAME_LOCKS:
return QUERY_LOCKS[DEFAULT]
+ elif name == QUERY_NAME_BLOCKERS:
+ if version < 90600:
+ return QUERY_BLOCKERS[V96]
+ return QUERY_BLOCKERS[DEFAULT]
elif name == QUERY_NAME_DATABASES:
return QUERY_DATABASES[DEFAULT]
elif name == QUERY_NAME_STANDBY:
@@ -644,6 +786,10 @@ def query_factory(name, version=NO_VERSION):
return QUERY_SHOW_VERSION[DEFAULT]
elif name == QUERY_NAME_AUTOVACUUM:
return QUERY_AUTOVACUUM[DEFAULT]
+ elif name == QUERY_NAME_FORCED_AUTOVACUUM:
+ return QUERY_FORCED_AUTOVACUUM[DEFAULT]
+ elif name == QUERY_NAME_TX_WRAPAROUND:
+ return QUERY_TX_WRAPAROUND[DEFAULT]
elif name == QUERY_NAME_WAL:
if version < 100000:
return QUERY_WAL[V96]
@@ -656,6 +802,8 @@ def query_factory(name, version=NO_VERSION):
if version < 100000:
return QUERY_STANDBY_DELTA[V96]
return QUERY_STANDBY_DELTA[DEFAULT]
+ elif name == QUERY_NAME_STANDBY_LAG:
+ return QUERY_STANDBY_LAG[DEFAULT]
elif name == QUERY_NAME_REPSLOT_FILES:
if version < 110000:
return QUERY_REPSLOT_FILES[V10]
@@ -676,6 +824,7 @@ ORDER = [
'db_stat_tuple_write',
'db_stat_transactions',
'db_stat_connections',
+ 'db_stat_blocking_pids_avg',
'database_size',
'backend_process',
'backend_usage',
@@ -695,7 +844,11 @@ ORDER = [
'stat_bgwriter_maxwritten',
'replication_slot',
'standby_delta',
- 'autovacuum'
+ 'standby_lag',
+ 'autovacuum',
+ 'forced_autovacuum',
+ 'tx_wraparound_oldest_current_xid',
+ 'tx_wraparound_percent_towards_wraparound'
]
CHARTS = {
@@ -752,6 +905,13 @@ CHARTS = {
['temp_files', 'files', 'incremental']
]
},
+ 'db_stat_blocking_pids_avg': {
+ 'options': [None, 'Average number of blocking transactions in db', 'processes', 'db statistics',
+ 'postgres.db_stat_blocking_pids_avg', 'line'],
+ 'lines': [
+ ['blocking_pids_avg', 'blocking', 'absolute']
+ ]
+ },
'database_size': {
'options': [None, 'Database size', 'MiB', 'database size', 'postgres.db_size', 'stacked'],
'lines': [
@@ -875,6 +1035,24 @@ CHARTS = {
['brin_summarize', 'brin summarize', 'absolute']
]
},
+ 'forced_autovacuum': {
+ 'options': [None, 'Percent towards forced autovacuum', 'percent', 'autovacuum', 'postgres.forced_autovacuum', 'line'],
+ 'lines': [
+ ['percent_towards_forced_autovacuum', 'percent', 'absolute']
+ ]
+ },
+ 'tx_wraparound_oldest_current_xid': {
+ 'options': [None, 'Oldest current XID', 'xid', 'tx_wraparound', 'postgres.tx_wraparound_oldest_current_xid', 'line'],
+ 'lines': [
+ ['oldest_current_xid', 'xid', 'absolute']
+ ]
+ },
+ 'tx_wraparound_percent_towards_wraparound': {
+ 'options': [None, 'Percent towards wraparound', 'percent', 'tx_wraparound', 'postgres.percent_towards_wraparound', 'line'],
+ 'lines': [
+ ['percent_towards_wraparound', 'percent', 'absolute']
+ ]
+ },
'standby_delta': {
'options': [None, 'Standby delta', 'KiB', 'replication delta', 'postgres.standby_delta', 'line'],
'lines': [
@@ -884,6 +1062,14 @@ CHARTS = {
['replay_delta', 'replay delta', 'absolute', 1, 1024]
]
},
+ 'standby_lag': {
+ 'options': [None, 'Standby lag', 'seconds', 'replication lag', 'postgres.standby_lag', 'line'],
+ 'lines': [
+ ['write_lag', 'write lag', 'absolute'],
+ ['flush_lag', 'flush lag', 'absolute'],
+ ['replay_lag', 'replay lag', 'absolute']
+ ]
+ },
'replication_slot': {
'options': [None, 'Replication slot files', 'files', 'replication slot', 'postgres.replication_slot', 'line'],
'lines': [
@@ -1073,6 +1259,7 @@ class Service(SimpleService):
self.queries[query_factory(QUERY_NAME_BGWRITER)] = METRICS[QUERY_NAME_BGWRITER]
self.queries[query_factory(QUERY_NAME_DIFF_LSN, self.server_version)] = METRICS[QUERY_NAME_WAL_WRITES]
self.queries[query_factory(QUERY_NAME_STANDBY_DELTA, self.server_version)] = METRICS[QUERY_NAME_STANDBY_DELTA]
+ self.queries[query_factory(QUERY_NAME_BLOCKERS, self.server_version)] = METRICS[QUERY_NAME_BLOCKERS]
if self.do_index_stats:
self.queries[query_factory(QUERY_NAME_INDEX_STATS)] = METRICS[QUERY_NAME_INDEX_STATS]
@@ -1092,6 +1279,12 @@ class Service(SimpleService):
if self.server_version >= 90400:
self.queries[query_factory(QUERY_NAME_AUTOVACUUM)] = METRICS[QUERY_NAME_AUTOVACUUM]
+ self.queries[query_factory(QUERY_NAME_FORCED_AUTOVACUUM)] = METRICS[QUERY_NAME_FORCED_AUTOVACUUM]
+ self.queries[query_factory(QUERY_NAME_TX_WRAPAROUND)] = METRICS[QUERY_NAME_TX_WRAPAROUND]
+
+ if self.server_version >= 100000:
+ self.queries[query_factory(QUERY_NAME_STANDBY_LAG)] = METRICS[QUERY_NAME_STANDBY_LAG]
+
def create_dynamic_charts(self):
for database_name in self.databases[::-1]:
dim = [
@@ -1116,11 +1309,19 @@ class Service(SimpleService):
)
for application_name in self.secondaries[::-1]:
- add_replication_delta_chart(
+ add_replication_standby_chart(
order=self.order,
definitions=self.definitions,
name='standby_delta',
application_name=application_name,
+ chart_family='replication delta',
+ )
+ add_replication_standby_chart(
+ order=self.order,
+ definitions=self.definitions,
+ name='standby_lag',
+ application_name=application_name,
+ chart_family='replication lag',
)
for slot_name in self.replication_slots[::-1]:
@@ -1199,7 +1400,7 @@ def add_database_stat_chart(order, definitions, name, database_name):
'lines': create_lines(database_name, chart_template['lines'])}
-def add_replication_delta_chart(order, definitions, name, application_name):
+def add_replication_standby_chart(order, definitions, name, application_name, chart_family):
def create_lines(standby, lines):
result = list()
for line in lines:
@@ -1213,7 +1414,7 @@ def add_replication_delta_chart(order, definitions, name, application_name):
order.insert(position, chart_name)
name, title, units, _, context, chart_type = chart_template['options']
definitions[chart_name] = {
- 'options': [name, title + ': ' + application_name, units, 'replication delta', context, chart_type],
+ 'options': [name, title + ': ' + application_name, units, chart_family, context, chart_type],
'lines': create_lines(application_name, chart_template['lines'])}