summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin')
-rw-r--r--collectors/python.d.plugin/anomalies/README.md4
-rw-r--r--collectors/python.d.plugin/anomalies/anomalies.chart.py4
-rw-r--r--collectors/python.d.plugin/anomalies/anomalies.conf4
-rw-r--r--collectors/python.d.plugin/changefinder/README.md12
-rw-r--r--collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py38
-rw-r--r--collectors/python.d.plugin/go_expvar/go_expvar.chart.py2
-rw-r--r--collectors/python.d.plugin/httpcheck/README.md2
-rw-r--r--collectors/python.d.plugin/mongodb/mongodb.chart.py4
-rw-r--r--collectors/python.d.plugin/mysql/README.md2
-rw-r--r--collectors/python.d.plugin/mysql/mysql.chart.py4
-rw-r--r--collectors/python.d.plugin/postgres/README.md19
-rw-r--r--collectors/python.d.plugin/postgres/postgres.chart.py245
-rw-r--r--collectors/python.d.plugin/postgres/postgres.conf9
-rw-r--r--collectors/python.d.plugin/python.d.plugin.in43
-rw-r--r--collectors/python.d.plugin/smartd_log/smartd_log.chart.py2
-rw-r--r--collectors/python.d.plugin/varnish/varnish.chart.py2
-rw-r--r--collectors/python.d.plugin/zscores/README.md12
-rw-r--r--collectors/python.d.plugin/zscores/zscores.chart.py4
-rw-r--r--collectors/python.d.plugin/zscores/zscores.conf6
19 files changed, 334 insertions, 84 deletions
diff --git a/collectors/python.d.plugin/anomalies/README.md b/collectors/python.d.plugin/anomalies/README.md
index 9d24e8685..c58c858bf 100644
--- a/collectors/python.d.plugin/anomalies/README.md
+++ b/collectors/python.d.plugin/anomalies/README.md
@@ -82,8 +82,8 @@ The default configuration should look something like this. Here you can see each
# JOBS (data collection sources)
# Pull data from local Netdata node.
-local:
- name: 'local'
+anomalies:
+ name: 'Anomalies'
# Host to pull data from.
host: '127.0.0.1:19999'
diff --git a/collectors/python.d.plugin/anomalies/anomalies.chart.py b/collectors/python.d.plugin/anomalies/anomalies.chart.py
index 61b51d9c0..8ca3df682 100644
--- a/collectors/python.d.plugin/anomalies/anomalies.chart.py
+++ b/collectors/python.d.plugin/anomalies/anomalies.chart.py
@@ -188,7 +188,7 @@ class Service(SimpleService):
self.custom_model_scalers[model] = MinMaxScaler()
def reinitialize(self):
- """Reinitialize charts, models and data to a begining state.
+ """Reinitialize charts, models and data to a beginning state.
"""
self.charts_init()
self.custom_models_init()
@@ -385,7 +385,7 @@ class Service(SimpleService):
def get_data(self):
- # initialize to whats available right now
+ # initialize to what's available right now
if self.reinitialize_at_every_step or len(self.host_charts_dict[self.host]) == 0:
self.charts_init()
self.custom_models_init()
diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf
index 0dc40ef2c..ef867709a 100644
--- a/collectors/python.d.plugin/anomalies/anomalies.conf
+++ b/collectors/python.d.plugin/anomalies/anomalies.conf
@@ -31,8 +31,8 @@
# JOBS (data collection sources)
# Pull data from local Netdata node.
-local:
- name: 'local'
+anomalies:
+ name: 'Anomalies'
# Host to pull data from.
host: '127.0.0.1:19999'
diff --git a/collectors/python.d.plugin/changefinder/README.md b/collectors/python.d.plugin/changefinder/README.md
index e1c1d4ba4..051639d1e 100644
--- a/collectors/python.d.plugin/changefinder/README.md
+++ b/collectors/python.d.plugin/changefinder/README.md
@@ -12,8 +12,8 @@ on your Netdata charts and/or dimensions.
Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a
changepoint score for each chart or dimension you configure it to work on. This is
-an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithim so there is no batch step
-to train the model, instead it evolves over time as more data arrives. That makes this particualr algorithim quite cheap
+an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step
+to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap
to compute at each step of data collection (see the notes section below for more details) and it should scale fairly
well to work on lots of charts or hosts (if running on a parent node for example).
@@ -28,7 +28,7 @@ Two charts are available:
This chart shows the percentile of the score that is output from the ChangeFinder library (it is turned off by default
but available with `show_scores: true`).
-A high observed score is more likley to be a valid changepoint worth exploring, even more so when multiple charts or
+A high observed score is more likely to be a valid changepoint worth exploring, even more so when multiple charts or
dimensions have high changepoint scores at the same time or very close together.
### ChangeFinder Flags (`changefinder.flags`)
@@ -36,11 +36,11 @@ dimensions have high changepoint scores at the same time or very close together.
This chart shows `1` or `0` if the latest score has a percentile value that exceeds the `cf_threshold` threshold. By
default, any scores that are in the 99th or above percentile will raise a flag on this chart.
-The raw changefinder score itself can be a little noisey and so limiting ourselves to just periods where it surpasses
+The raw changefinder score itself can be a little noisy and so limiting ourselves to just periods where it surpasses
the 99th percentile can help manage the "[signal to noise ratio](https://en.wikipedia.org/wiki/Signal-to-noise_ratio)"
better.
-The `cf_threshold` paramater might be one you want to play around with to tune things specifically for the workloads on
+The `cf_threshold` parameter might be one you want to play around with to tune things specifically for the workloads on
your node and the specific charts you want to monitor. For example, maybe the 95th percentile might work better for you
than the 99th percentile.
@@ -164,7 +164,7 @@ sudo su -s /bin/bash netdata
- It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's
typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly
this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw
- score returned by the ChangeFinder algorithim into a percentile based on the most recent `n_score_samples` that have
+ score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have
already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then
should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning
approaches which need some initial window of time before they can be useful.
diff --git a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
index dddf50b4c..93614b08c 100644
--- a/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
+++ b/collectors/python.d.plugin/elasticsearch/elasticsearch.chart.py
@@ -506,7 +506,9 @@ CHARTS = {
def convert_index_store_size_to_bytes(size):
- # can be b, kb, mb, gb
+ # can be b, kb, mb, gb or None
+ if size is None:
+ return -1
if size.endswith('kb'):
return round(float(size[:-2]) * 1024)
elif size.endswith('mb'):
@@ -520,6 +522,12 @@ def convert_index_store_size_to_bytes(size):
return -1
+def convert_index_null_value(value):
+ if value is None:
+ return -1
+ return value
+
+
def convert_index_health(health):
if health == 'green':
return 0
@@ -634,6 +642,30 @@ class Service(UrlService):
# "docs.count": "10",
# "docs.deleted": "3",
# "store.size": "650b"
+ # },
+ # {
+ # "status":"open",
+ # "index":".kibana_3",
+ # "health":"red",
+ # "uuid":"umAdNrq6QaOXrmZjAowTNw",
+ # "store.size":null,
+ # "pri.store.size":null,
+ # "docs.count":null,
+ # "rep":"0",
+ # "pri":"1",
+ # "docs.deleted":null
+ # },
+ # {
+ # "health" : "green",
+ # "status" : "close",
+ # "index" : "siem-events-2021.09.12",
+ # "uuid" : "mTQ-Yl5TS7S3lGoRORE-Pg",
+ # "pri" : "4",
+ # "rep" : "0",
+ # "docs.count" : null,
+ # "docs.deleted" : null,
+ # "store.size" : null,
+ # "pri.store.size" : null
# }
# ]
raw_data = self._get_raw_data(url)
@@ -654,10 +686,12 @@ class Service(UrlService):
continue
v = {
- '{0}_index_docs_count'.format(name): idx['docs.count'],
'{0}_index_replica'.format(name): idx['rep'],
'{0}_index_health'.format(name): convert_index_health(idx['health']),
}
+ docs_count = convert_index_null_value(idx['docs.count'])
+ if docs_count != -1:
+ v['{0}_index_docs_count'.format(name)] = idx['docs.count']
size = convert_index_store_size_to_bytes(idx['store.size'])
if size != -1:
v['{0}_index_store_size'.format(name)] = size
diff --git a/collectors/python.d.plugin/go_expvar/go_expvar.chart.py b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py
index f9bbdc164..dca010817 100644
--- a/collectors/python.d.plugin/go_expvar/go_expvar.chart.py
+++ b/collectors/python.d.plugin/go_expvar/go_expvar.chart.py
@@ -237,7 +237,7 @@ class Service(UrlService):
gc_pauses = memstats['PauseNs']
try:
gc_pause_avg = sum(gc_pauses) / len([x for x in gc_pauses if x > 0])
- # no GC cycles have occured yet
+ # no GC cycles have occurred yet
except ZeroDivisionError:
gc_pause_avg = 0
diff --git a/collectors/python.d.plugin/httpcheck/README.md b/collectors/python.d.plugin/httpcheck/README.md
index 55aad52f0..59c60f544 100644
--- a/collectors/python.d.plugin/httpcheck/README.md
+++ b/collectors/python.d.plugin/httpcheck/README.md
@@ -25,7 +25,7 @@ Following charts are drawn per job:
## Configuration
-Edit the `python.d/httpcheck.conf` configuration file using `edit-config` from the Netdata [config
+Edit the [`python.d/httpcheck.conf`](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/httpcheck/httpcheck.conf) configuration file using `edit-config` from the Netdata [config
directory](/docs/configure/nodes.md), which is typically at `/etc/netdata`.
```bash
diff --git a/collectors/python.d.plugin/mongodb/mongodb.chart.py b/collectors/python.d.plugin/mongodb/mongodb.chart.py
index 2e6fb220a..bec94d3ef 100644
--- a/collectors/python.d.plugin/mongodb/mongodb.chart.py
+++ b/collectors/python.d.plugin/mongodb/mongodb.chart.py
@@ -250,10 +250,10 @@ CHARTS = {
]
},
'cursors': {
- 'options': [None, 'Currently openned cursors, cursors with timeout disabled and timed out cursors',
+ 'options': [None, 'Currently opened cursors, cursors with timeout disabled and timed out cursors',
'cursors', 'database performance', 'mongodb.cursors', 'stacked'],
'lines': [
- ['cursor_total', 'openned', 'absolute', 1, 1],
+ ['cursor_total', 'opened', 'absolute', 1, 1],
['noTimeout', None, 'absolute', 1, 1],
['timedOut', None, 'incremental', 1, 1]
]
diff --git a/collectors/python.d.plugin/mysql/README.md b/collectors/python.d.plugin/mysql/README.md
index d8d3c1d0b..63d2c1e53 100644
--- a/collectors/python.d.plugin/mysql/README.md
+++ b/collectors/python.d.plugin/mysql/README.md
@@ -17,7 +17,7 @@ To create the `netdata` user, execute the following in the MySQL shell:
```sh
create user 'netdata'@'localhost';
-grant usage on *.* to 'netdata'@'localhost';
+grant usage, replication client on *.* to 'netdata'@'localhost';
flush privileges;
```
The `netdata` user will have the ability to connect to the MySQL server on `localhost` without a password.
diff --git a/collectors/python.d.plugin/mysql/mysql.chart.py b/collectors/python.d.plugin/mysql/mysql.chart.py
index 1737e16b4..e8c03cb00 100644
--- a/collectors/python.d.plugin/mysql/mysql.chart.py
+++ b/collectors/python.d.plugin/mysql/mysql.chart.py
@@ -398,7 +398,7 @@ CHARTS = {
]
},
'innodb_os_log_fsync_writes': {
- 'options': [None, 'InnoDB OS Log Operations', 'operations/s', 'innodb', 'mysql.innodb_os_log', 'line'],
+ 'options': [None, 'InnoDB OS Log Operations', 'operations/s', 'innodb', 'mysql.innodb_os_log_fsyncs', 'line'],
'lines': [
['Innodb_os_log_fsyncs', 'fsyncs', 'incremental'],
]
@@ -445,7 +445,7 @@ CHARTS = {
},
'innodb_buffer_pool_flush_pages_requests': {
'options': [None, 'InnoDB Buffer Pool Flush Pages Requests', 'requests/s', 'innodb',
- 'mysql.innodb_buffer_pool_pages', 'line'],
+ 'mysql.innodb_buffer_pool_pages_flushed', 'line'],
'lines': [
['Innodb_buffer_pool_pages_flushed', 'flush pages', 'incremental'],
]
diff --git a/collectors/python.d.plugin/postgres/README.md b/collectors/python.d.plugin/postgres/README.md
index dc9b18467..0515ec57c 100644
--- a/collectors/python.d.plugin/postgres/README.md
+++ b/collectors/python.d.plugin/postgres/README.md
@@ -12,6 +12,8 @@ Collects database health and performance metrics.
- `python-psycopg2` package. You have to install it manually and make sure that it is available to the `netdata` user, either using `pip`, the package manager of your Linux distribution, or any other method you prefer.
+- PostgreSQL v9.4+
+
Following charts are drawn:
1. **Database size** MB
@@ -68,6 +70,23 @@ Following charts are drawn:
- locks
+12. **Standby delta** KB
+
+ - sent delta
+ - write delta
+ - flush delta
+ - replay delta
+
+13. **Standby lag** seconds
+
+ - write lag
+ - flush lag
+ - replay lag
+
+14. **Average number of blocking transactions in db** processes
+
+ - blocking
+
## Configuration
Edit the `python.d/postgres.conf` configuration file using `edit-config` from the Netdata [config
diff --git a/collectors/python.d.plugin/postgres/postgres.chart.py b/collectors/python.d.plugin/postgres/postgres.chart.py
index bd28dd9b7..29026a6a3 100644
--- a/collectors/python.d.plugin/postgres/postgres.chart.py
+++ b/collectors/python.d.plugin/postgres/postgres.chart.py
@@ -45,14 +45,18 @@ QUERY_NAME_INDEX_STATS = 'INDEX_STATS'
QUERY_NAME_DATABASE = 'DATABASE'
QUERY_NAME_BGWRITER = 'BGWRITER'
QUERY_NAME_LOCKS = 'LOCKS'
+QUERY_NAME_BLOCKERS = 'BLOCKERS'
QUERY_NAME_DATABASES = 'DATABASES'
QUERY_NAME_STANDBY = 'STANDBY'
QUERY_NAME_REPLICATION_SLOT = 'REPLICATION_SLOT'
QUERY_NAME_STANDBY_DELTA = 'STANDBY_DELTA'
+QUERY_NAME_STANDBY_LAG = 'STANDBY_LAG'
QUERY_NAME_REPSLOT_FILES = 'REPSLOT_FILES'
QUERY_NAME_IF_SUPERUSER = 'IF_SUPERUSER'
QUERY_NAME_SERVER_VERSION = 'SERVER_VERSION'
QUERY_NAME_AUTOVACUUM = 'AUTOVACUUM'
+QUERY_NAME_FORCED_AUTOVACUUM = 'FORCED_AUTOVACUUM'
+QUERY_NAME_TX_WRAPAROUND = 'TX_WRAPAROUND'
QUERY_NAME_DIFF_LSN = 'DIFF_LSN'
QUERY_NAME_WAL_WRITES = 'WAL_WRITES'
@@ -123,6 +127,9 @@ METRICS = {
'ShareLock',
'RowExclusiveLock'
],
+ QUERY_NAME_BLOCKERS: [
+ 'blocking_pids_avg'
+ ],
QUERY_NAME_AUTOVACUUM: [
'analyze',
'vacuum_analyze',
@@ -130,12 +137,24 @@ METRICS = {
'vacuum_freeze',
'brin_summarize'
],
+ QUERY_NAME_FORCED_AUTOVACUUM: [
+ 'percent_towards_forced_vacuum'
+ ],
+ QUERY_NAME_TX_WRAPAROUND: [
+ 'oldest_current_xid',
+ 'percent_towards_wraparound'
+ ],
QUERY_NAME_STANDBY_DELTA: [
'sent_delta',
'write_delta',
'flush_delta',
'replay_delta'
],
+ QUERY_NAME_STANDBY_LAG: [
+ 'write_lag',
+ 'flush_lag',
+ 'replay_lag'
+ ],
QUERY_NAME_REPSLOT_FILES: [
'replslot_wal_keep',
'replslot_files'
@@ -177,7 +196,7 @@ FROM
FROM pg_catalog.pg_ls_dir('pg_wal') AS wal(name)
WHERE name ~ '^[0-9A-F]{24}$'
ORDER BY
- (pg_stat_file('pg_wal/'||name)).modification,
+ (pg_stat_file('pg_wal/'||name, true)).modification,
wal.name DESC) sub;
""",
V96: """
@@ -204,7 +223,7 @@ FROM
FROM pg_catalog.pg_ls_dir('pg_xlog') AS wal(name)
WHERE name ~ '^[0-9A-F]{24}$'
ORDER BY
- (pg_stat_file('pg_xlog/'||name)).modification,
+ (pg_stat_file('pg_xlog/'||name, true)).modification,
wal.name DESC) sub;
""",
}
@@ -263,7 +282,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE backend_type IN ('client backend', 'background worker')
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -277,7 +296,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE query NOT LIKE 'autovacuum: %%'
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -291,7 +310,7 @@ FROM (
FROM pg_catalog.pg_stat_activity
WHERE current_query NOT LIKE 'autovacuum: %%'
UNION ALL
- SELECT 'r', COUNT(1)
+ SELECT 'r', COUNT(1)
FROM pg_catalog.pg_stat_replication
) as s;
""",
@@ -386,6 +405,48 @@ ORDER BY datname, mode;
""",
}
+QUERY_BLOCKERS = {
+ DEFAULT: """
+WITH B AS (
+SELECT DISTINCT
+ pg_database.datname as database_name,
+ pg_locks.pid,
+ cardinality(pg_blocking_pids(pg_locks.pid)) AS blocking_pids
+FROM pg_locks
+INNER JOIN pg_database ON pg_database.oid = pg_locks.database
+WHERE NOT pg_locks.granted)
+SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg
+FROM B
+GROUP BY database_name
+""",
+ V96: """
+WITH B AS (
+SELECT DISTINCT
+ pg_database.datname as database_name,
+ blocked_locks.pid AS blocked_pid,
+ COUNT(blocking_locks.pid) AS blocking_pids
+FROM pg_catalog.pg_locks blocked_locks
+INNER JOIN pg_database ON pg_database.oid = blocked_locks.database
+JOIN pg_catalog.pg_locks blocking_locks
+ ON blocking_locks.locktype = blocked_locks.locktype
+ AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
+ AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
+ AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
+ AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
+ AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
+ AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
+ AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
+ AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
+ AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
+ AND blocking_locks.pid != blocked_locks.pid
+WHERE NOT blocked_locks.GRANTED
+GROUP BY database_name, blocked_pid)
+SELECT database_name, AVG(blocking_pids) AS blocking_pids_avg
+FROM B
+GROUP BY database_name
+"""
+}
+
QUERY_DATABASES = {
DEFAULT: """
SELECT
@@ -394,17 +455,18 @@ FROM pg_stat_database
WHERE
has_database_privilege(
(SELECT current_user), datname, 'connect')
- AND NOT datname ~* '^template\d';
+ AND NOT datname ~* '^template\d'
+ORDER BY datname;
""",
}
QUERY_STANDBY = {
DEFAULT: """
SELECT
- application_name
-FROM pg_stat_replication
-WHERE application_name IS NOT NULL
-GROUP BY application_name;
+ COALESCE(prs.slot_name, psr.application_name) application_name
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
+WHERE application_name IS NOT NULL;
""",
}
@@ -418,7 +480,7 @@ FROM pg_replication_slots;
QUERY_STANDBY_DELTA = {
DEFAULT: """
SELECT
- application_name,
+ COALESCE(prs.slot_name, psr.application_name) application_name,
pg_wal_lsn_diff(
CASE pg_is_in_recovery()
WHEN true THEN pg_last_wal_receive_lsn()
@@ -443,12 +505,13 @@ SELECT
ELSE pg_current_wal_lsn()
END,
replay_lsn) AS replay_delta
-FROM pg_stat_replication
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
WHERE application_name IS NOT NULL;
""",
V96: """
SELECT
- application_name,
+ COALESCE(prs.slot_name, psr.application_name) application_name,
pg_xlog_location_diff(
CASE pg_is_in_recovery()
WHEN true THEN pg_last_xlog_receive_location()
@@ -473,11 +536,25 @@ SELECT
ELSE pg_current_xlog_location()
END,
replay_location) AS replay_delta
-FROM pg_stat_replication
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
WHERE application_name IS NOT NULL;
""",
}
+QUERY_STANDBY_LAG = {
+ DEFAULT: """
+SELECT
+ COALESCE(prs.slot_name, psr.application_name) application_name,
+ COALESCE(EXTRACT(EPOCH FROM write_lag)::bigint, 0) AS write_lag,
+ COALESCE(EXTRACT(EPOCH FROM flush_lag)::bigint, 0) AS flush_lag,
+ COALESCE(EXTRACT(EPOCH FROM replay_lag)::bigint, 0) AS replay_lag
+FROM pg_stat_replication psr
+LEFT OUTER JOIN pg_replication_slots prs on psr.pid = prs.active_pid
+WHERE application_name IS NOT NULL;
+"""
+}
+
QUERY_REPSLOT_FILES = {
DEFAULT: """
WITH wal_size AS (
@@ -500,8 +577,20 @@ FROM
slot_type,
COALESCE (
floor(
- (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn)
- - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val)
+ CASE WHEN pg_is_in_recovery()
+ THEN (
+ pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val)
+ ) / s.val
+ ELSE (
+ pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_walfile_name_offset(restart_lsn)).file_offset
+ ) / s.val
+ END
),0) AS replslot_wal_keep
FROM pg_replication_slots slot
LEFT JOIN (
@@ -539,8 +628,20 @@ FROM
slot_type,
COALESCE (
floor(
- (pg_wal_lsn_diff(pg_current_wal_lsn (),slot.restart_lsn)
- - (pg_walfile_name_offset (restart_lsn)).file_offset) / (s.val)
+ CASE WHEN pg_is_in_recovery()
+ THEN (
+ pg_wal_lsn_diff(pg_last_wal_receive_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_wal_lsn_diff(restart_lsn, '0/0') %% s.val)
+ ) / s.val
+ ELSE (
+ pg_wal_lsn_diff(pg_current_wal_lsn(), slot.restart_lsn)
+ -- this is needed to account for whole WAL retention and
+ -- not only size retention
+ + (pg_walfile_name_offset(restart_lsn)).file_offset
+ ) / s.val
+ END
),0) AS replslot_wal_keep
FROM pg_replication_slots slot
LEFT JOIN (
@@ -586,6 +687,43 @@ WHERE query NOT LIKE '%%pg_stat_activity%%';
""",
}
+QUERY_FORCED_AUTOVACUUM = {
+ DEFAULT: """
+WITH max_age AS (
+ SELECT setting AS autovacuum_freeze_max_age
+ FROM pg_catalog.pg_settings
+ WHERE name = 'autovacuum_freeze_max_age' )
+, per_database_stats AS (
+ SELECT datname
+ , m.autovacuum_freeze_max_age::int
+ , age(d.datfrozenxid) AS oldest_current_xid
+ FROM pg_catalog.pg_database d
+ JOIN max_age m ON (true)
+ WHERE d.datallowconn )
+SELECT max(ROUND(100*(oldest_current_xid/autovacuum_freeze_max_age::float))) AS percent_towards_forced_autovacuum
+FROM per_database_stats;
+""",
+}
+
+QUERY_TX_WRAPAROUND = {
+ DEFAULT: """
+WITH max_age AS (
+ SELECT 2000000000 as max_old_xid
+ FROM pg_catalog.pg_settings
+ WHERE name = 'autovacuum_freeze_max_age' )
+, per_database_stats AS (
+ SELECT datname
+ , m.max_old_xid::int
+ , age(d.datfrozenxid) AS oldest_current_xid
+ FROM pg_catalog.pg_database d
+ JOIN max_age m ON (true)
+ WHERE d.datallowconn )
+SELECT max(oldest_current_xid) AS oldest_current_xid
+ , max(ROUND(100*(oldest_current_xid/max_old_xid::float))) AS percent_towards_wraparound
+FROM per_database_stats;
+""",
+}
+
QUERY_DIFF_LSN = {
DEFAULT: """
SELECT
@@ -632,6 +770,10 @@ def query_factory(name, version=NO_VERSION):
return QUERY_BGWRITER[DEFAULT]
elif name == QUERY_NAME_LOCKS:
return QUERY_LOCKS[DEFAULT]
+ elif name == QUERY_NAME_BLOCKERS:
+ if version < 90600:
+ return QUERY_BLOCKERS[V96]
+ return QUERY_BLOCKERS[DEFAULT]
elif name == QUERY_NAME_DATABASES:
return QUERY_DATABASES[DEFAULT]
elif name == QUERY_NAME_STANDBY:
@@ -644,6 +786,10 @@ def query_factory(name, version=NO_VERSION):
return QUERY_SHOW_VERSION[DEFAULT]
elif name == QUERY_NAME_AUTOVACUUM:
return QUERY_AUTOVACUUM[DEFAULT]
+ elif name == QUERY_NAME_FORCED_AUTOVACUUM:
+ return QUERY_FORCED_AUTOVACUUM[DEFAULT]
+ elif name == QUERY_NAME_TX_WRAPAROUND:
+ return QUERY_TX_WRAPAROUND[DEFAULT]
elif name == QUERY_NAME_WAL:
if version < 100000:
return QUERY_WAL[V96]
@@ -656,6 +802,8 @@ def query_factory(name, version=NO_VERSION):
if version < 100000:
return QUERY_STANDBY_DELTA[V96]
return QUERY_STANDBY_DELTA[DEFAULT]
+ elif name == QUERY_NAME_STANDBY_LAG:
+ return QUERY_STANDBY_LAG[DEFAULT]
elif name == QUERY_NAME_REPSLOT_FILES:
if version < 110000:
return QUERY_REPSLOT_FILES[V10]
@@ -676,6 +824,7 @@ ORDER = [
'db_stat_tuple_write',
'db_stat_transactions',
'db_stat_connections',
+ 'db_stat_blocking_pids_avg',
'database_size',
'backend_process',
'backend_usage',
@@ -695,7 +844,11 @@ ORDER = [
'stat_bgwriter_maxwritten',
'replication_slot',
'standby_delta',
- 'autovacuum'
+ 'standby_lag',
+ 'autovacuum',
+ 'forced_autovacuum',
+ 'tx_wraparound_oldest_current_xid',
+ 'tx_wraparound_percent_towards_wraparound'
]
CHARTS = {
@@ -752,6 +905,13 @@ CHARTS = {
['temp_files', 'files', 'incremental']
]
},
+ 'db_stat_blocking_pids_avg': {
+ 'options': [None, 'Average number of blocking transactions in db', 'processes', 'db statistics',
+ 'postgres.db_stat_blocking_pids_avg', 'line'],
+ 'lines': [
+ ['blocking_pids_avg', 'blocking', 'absolute']
+ ]
+ },
'database_size': {
'options': [None, 'Database size', 'MiB', 'database size', 'postgres.db_size', 'stacked'],
'lines': [
@@ -875,6 +1035,24 @@ CHARTS = {
['brin_summarize', 'brin summarize', 'absolute']
]
},
+ 'forced_autovacuum': {
+ 'options': [None, 'Percent towards forced autovacuum', 'percent', 'autovacuum', 'postgres.forced_autovacuum', 'line'],
+ 'lines': [
+ ['percent_towards_forced_autovacuum', 'percent', 'absolute']
+ ]
+ },
+ 'tx_wraparound_oldest_current_xid': {
+ 'options': [None, 'Oldest current XID', 'xid', 'tx_wraparound', 'postgres.tx_wraparound_oldest_current_xid', 'line'],
+ 'lines': [
+ ['oldest_current_xid', 'xid', 'absolute']
+ ]
+ },
+ 'tx_wraparound_percent_towards_wraparound': {
+ 'options': [None, 'Percent towards wraparound', 'percent', 'tx_wraparound', 'postgres.percent_towards_wraparound', 'line'],
+ 'lines': [
+ ['percent_towards_wraparound', 'percent', 'absolute']
+ ]
+ },
'standby_delta': {
'options': [None, 'Standby delta', 'KiB', 'replication delta', 'postgres.standby_delta', 'line'],
'lines': [
@@ -884,6 +1062,14 @@ CHARTS = {
['replay_delta', 'replay delta', 'absolute', 1, 1024]
]
},
+ 'standby_lag': {
+ 'options': [None, 'Standby lag', 'seconds', 'replication lag', 'postgres.standby_lag', 'line'],
+ 'lines': [
+ ['write_lag', 'write lag', 'absolute'],
+ ['flush_lag', 'flush lag', 'absolute'],
+ ['replay_lag', 'replay lag', 'absolute']
+ ]
+ },
'replication_slot': {
'options': [None, 'Replication slot files', 'files', 'replication slot', 'postgres.replication_slot', 'line'],
'lines': [
@@ -1073,6 +1259,7 @@ class Service(SimpleService):
self.queries[query_factory(QUERY_NAME_BGWRITER)] = METRICS[QUERY_NAME_BGWRITER]
self.queries[query_factory(QUERY_NAME_DIFF_LSN, self.server_version)] = METRICS[QUERY_NAME_WAL_WRITES]
self.queries[query_factory(QUERY_NAME_STANDBY_DELTA, self.server_version)] = METRICS[QUERY_NAME_STANDBY_DELTA]
+ self.queries[query_factory(QUERY_NAME_BLOCKERS, self.server_version)] = METRICS[QUERY_NAME_BLOCKERS]
if self.do_index_stats:
self.queries[query_factory(QUERY_NAME_INDEX_STATS)] = METRICS[QUERY_NAME_INDEX_STATS]
@@ -1092,6 +1279,12 @@ class Service(SimpleService):
if self.server_version >= 90400:
self.queries[query_factory(QUERY_NAME_AUTOVACUUM)] = METRICS[QUERY_NAME_AUTOVACUUM]
+ self.queries[query_factory(QUERY_NAME_FORCED_AUTOVACUUM)] = METRICS[QUERY_NAME_FORCED_AUTOVACUUM]
+ self.queries[query_factory(QUERY_NAME_TX_WRAPAROUND)] = METRICS[QUERY_NAME_TX_WRAPAROUND]
+
+ if self.server_version >= 100000:
+ self.queries[query_factory(QUERY_NAME_STANDBY_LAG)] = METRICS[QUERY_NAME_STANDBY_LAG]
+
def create_dynamic_charts(self):
for database_name in self.databases[::-1]:
dim = [
@@ -1116,11 +1309,19 @@ class Service(SimpleService):
)
for application_name in self.secondaries[::-1]:
- add_replication_delta_chart(
+ add_replication_standby_chart(
order=self.order,
definitions=self.definitions,
name='standby_delta',
application_name=application_name,
+ chart_family='replication delta',
+ )
+ add_replication_standby_chart(
+ order=self.order,
+ definitions=self.definitions,
+ name='standby_lag',
+ application_name=application_name,
+ chart_family='replication lag',
)
for slot_name in self.replication_slots[::-1]:
@@ -1199,7 +1400,7 @@ def add_database_stat_chart(order, definitions, name, database_name):
'lines': create_lines(database_name, chart_template['lines'])}
-def add_replication_delta_chart(order, definitions, name, application_name):
+def add_replication_standby_chart(order, definitions, name, application_name, chart_family):
def create_lines(standby, lines):
result = list()
for line in lines:
@@ -1213,7 +1414,7 @@ def add_replication_delta_chart(order, definitions, name, application_name):
order.insert(position, chart_name)
name, title, units, _, context, chart_type = chart_template['options']
definitions[chart_name] = {
- 'options': [name, title + ': ' + application_name, units, 'replication delta', context, chart_type],
+ 'options': [name, title + ': ' + application_name, units, chart_family, context, chart_type],
'lines': create_lines(application_name, chart_template['lines'])}
diff --git a/collectors/python.d.plugin/postgres/postgres.conf b/collectors/python.d.plugin/postgres/postgres.conf
index 1970a7a27..7e354d99b 100644
--- a/collectors/python.d.plugin/postgres/postgres.conf
+++ b/collectors/python.d.plugin/postgres/postgres.conf
@@ -97,14 +97,7 @@
# the client (Netdata) is not considered local, unless it runs from inside
# the same container.
#
-# Postgres supported versions are :
-# - 9.3 (without autovacuum)
-# - 9.4
-# - 9.5
-# - 9.6
-# - 10
-#
-# Superuser access is needed for theses charts:
+# Superuser access is needed for these charts:
# Write-Ahead Logs
# Archive Write-Ahead Logs
#
diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in
index 9d575d86f..b263f229e 100644
--- a/collectors/python.d.plugin/python.d.plugin.in
+++ b/collectors/python.d.plugin/python.d.plugin.in
@@ -500,27 +500,31 @@ class Plugin:
self.saver = None
self.runs = 0
- def load_config(self):
- paths = [
- DIRS.plugin_user_config,
- DIRS.plugin_stock_config,
- ]
- self.log.debug("looking for '{0}' in {1}".format(self.config_name, paths))
- abs_path = multi_path_find(self.config_name, *paths)
- if not abs_path:
- self.log.warning("'{0}' was not found, using defaults".format(self.config_name))
- return True
-
- self.log.debug("loading '{0}'".format(abs_path))
+ def load_config_file(self, filepath, expected):
+ self.log.debug("looking for '{0}'".format(filepath))
+ if not os.path.isfile(filepath):
+ log = self.log.info if not expected else self.log.error
+ log("'{0}' was not found".format(filepath))
+ return dict()
try:
- config = load_config(abs_path)
+ config = load_config(filepath)
except Exception as error:
- self.log.error("error on loading '{0}' : {1}".format(abs_path, repr(error)))
- return False
+ self.log.error("error on loading '{0}' : {1}".format(filepath, repr(error)))
+ return dict()
+ self.log.debug("'{0}' is loaded".format(filepath))
+ return config
- self.log.debug("'{0}' is loaded".format(abs_path))
- self.config.update(config)
- return True
+ def load_config(self):
+ user_config = self.load_config_file(
+ filepath=os.path.join(DIRS.plugin_user_config, self.config_name),
+ expected=False,
+ )
+ stock_config = self.load_config_file(
+ filepath=os.path.join(DIRS.plugin_stock_config, self.config_name),
+ expected=True,
+ )
+ self.config.update(stock_config)
+ self.config.update(user_config)
def load_job_statuses(self):
self.log.debug("looking for '{0}' in {1}".format(self.jobs_status_dump_name, DIRS.var_lib))
@@ -593,8 +597,7 @@ class Plugin:
return jobs
def setup(self):
- if not self.load_config():
- return False
+ self.load_config()
if not self.config['enabled']:
self.log.info('disabled in the configuration file')
diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
index 402035f14..75b8c8c40 100644
--- a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
+++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
@@ -154,7 +154,7 @@ CHARTS = {
'algo': INCREMENTAL,
},
'write_total_err_corrected': {
- 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'],
+ 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'],
'lines': [],
'attrs': [ATTR_WRITE_ERR_COR],
'algo': INCREMENTAL,
diff --git a/collectors/python.d.plugin/varnish/varnish.chart.py b/collectors/python.d.plugin/varnish/varnish.chart.py
index 534d70926..506ad026a 100644
--- a/collectors/python.d.plugin/varnish/varnish.chart.py
+++ b/collectors/python.d.plugin/varnish/varnish.chart.py
@@ -197,7 +197,7 @@ class VarnishVersion:
class Parser:
_backend_new = re.compile(r'VBE.([\d\w_.]+)\(.*?\).(beresp[\w_]+)\s+(\d+)')
- _backend_old = re.compile(r'VBE\.[\d\w-]+\.([\w\d_]+).(beresp[\w_]+)\s+(\d+)')
+ _backend_old = re.compile(r'VBE\.[\d\w-]+\.([\w\d_-]+).(beresp[\w_]+)\s+(\d+)')
_default = re.compile(r'([A-Z]+\.)?([\d\w_.]+)\s+(\d+)')
def __init__(self):
diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md
index 0b4472374..7fb189f6a 100644
--- a/collectors/python.d.plugin/zscores/README.md
+++ b/collectors/python.d.plugin/zscores/README.md
@@ -43,7 +43,7 @@ looking at first (for more background information on why 3 stddev
see [here](https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule#:~:text=In%20the%20empirical%20sciences%20the,99.7%25%20probability%20as%20near%20certainty.))
.
-In the example below we basically took a sledge hammer to our system so its not suprising that lots of charts light up
+In the example below we basically took a sledge hammer to our system so its not surprising that lots of charts light up
after we run the stress command. In a more realistic setting you might just see a handful of charts with strange zscores
and that could be a good indication of where to look first.
@@ -101,9 +101,9 @@ information about each one and what it does.
host: '127.0.0.1:19999'
# What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
charts_regex: 'system\..*'
-# length of time to base calulcations off for mean and stddev
+# length of time to base calculations off for mean and stddev
train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore
-# offset preceeding latest data to ignore when calculating mean and stddev
+# offset preceding latest data to ignore when calculating mean and stddev
offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev
# recalculate the mean and stddev every n steps of the collector
train_every_n: 900 # recalculate mean and stddev every 15 minutes
@@ -114,11 +114,11 @@ z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscore
# set z_abs: 'true' to make all zscores be absolute values only.
z_abs: 'true'
# burn in period in which to initially calculate mean and stddev on every step
-burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or inital calculations fail to return
+burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return
# mode can be to get a zscore 'per_dim' or 'per_chart'
mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step
# per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'
-per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all dimensions but will maintain the sign. 'mean' will just average.
+per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average.
```
## Notes
@@ -128,7 +128,7 @@ per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all di
calls to the netdata rest api to get the required data for each chart when calculating the mean and stddev.
- It may take a few hours or so for the collector to 'settle' into it's typical behaviour in terms of the scores you
will see in the normal running of your system.
-- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore accross all the
+- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore across all the
dimensions on the underlying chart.
- If you set `mode: 'per_dim'` then you will see a zscore for each dimension on each chart as opposed to one per chart.
- As this collector does some calculations itself in python you may want to try it out first on a test or development
diff --git a/collectors/python.d.plugin/zscores/zscores.chart.py b/collectors/python.d.plugin/zscores/zscores.chart.py
index 48397d8dd..1099b9376 100644
--- a/collectors/python.d.plugin/zscores/zscores.chart.py
+++ b/collectors/python.d.plugin/zscores/zscores.chart.py
@@ -24,11 +24,11 @@ ORDER = [
CHARTS = {
'z': {
- 'options': ['z', 'Z Score', 'z', 'Z Score', 'z', 'line'],
+ 'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'],
'lines': []
},
'3stddev': {
- 'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', '3stddev', 'stacked'],
+ 'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'],
'lines': []
},
}
diff --git a/collectors/python.d.plugin/zscores/zscores.conf b/collectors/python.d.plugin/zscores/zscores.conf
index fab18c787..07d62ebe6 100644
--- a/collectors/python.d.plugin/zscores/zscores.conf
+++ b/collectors/python.d.plugin/zscores/zscores.conf
@@ -83,7 +83,7 @@ local:
# length of time to base calculations off for mean and stddev
train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore
- # offset preceeding latest data to ignore when calculating mean and stddev
+ # offset preceding latest data to ignore when calculating mean and stddev
offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev
# recalculate the mean and stddev every n steps of the collector
@@ -99,10 +99,10 @@ local:
z_abs: 'true'
# burn in period in which to initially calculate mean and stddev on every step
- burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or inital calculations fail to return
+ burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return
# mode can be to get a zscore 'per_dim' or 'per_chart'
mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step
# per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'
- per_chart_agg: 'mean' # 'absmax' will take the max absolute value accross all dimensions but will maintain the sign. 'mean' will just average.
+ per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average.