summaryrefslogtreecommitdiffstats
path: root/health/health.d/dbengine.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/dbengine.conf')
-rw-r--r--health/health.d/dbengine.conf10
1 files changed, 6 insertions, 4 deletions
diff --git a/health/health.d/dbengine.conf b/health/health.d/dbengine.conf
index 274673e3..3e51d37e 100644
--- a/health/health.d/dbengine.conf
+++ b/health/health.d/dbengine.conf
@@ -10,7 +10,7 @@ lookup: sum -10m unaligned of fs_errors
every: 10s
crit: $this > 0
delay: down 15m multiplier 1.5 max 1h
- info: number of File-System errors dbengine came across the last 10 minutes (too many open files, wrong permissions etc)
+ info: number of filesystem errors in the last 10 minutes (too many open files, wrong permissions, etc)
to: sysadmin
alarm: 10min_dbengine_global_io_errors
@@ -22,7 +22,7 @@ lookup: sum -10m unaligned of io_errors
every: 10s
crit: $this > 0
delay: down 1h multiplier 1.5 max 3h
- info: number of IO errors dbengine came across the last 10 minutes (CRC errors, out of space, bad disk etc)
+ info: number of IO errors in the last 10 minutes (CRC errors, out of space, bad disk, etc)
to: sysadmin
alarm: 10min_dbengine_global_flushing_warnings
@@ -34,7 +34,8 @@ lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
every: 10s
warn: $this > 0
delay: down 1h multiplier 1.5 max 3h
- info: number of times in the last 10 minutes that dbengine dirty pages were over 50% of the instance's page cache, metric data at risk of not being stored in the database, please reduce disk load or use faster disks
+ info: number of times when dbengine dirty pages were over 50% of the instance's page cache in the last 10 minutes. \
+ Metric data are at risk of not being stored in the database. To remedy, reduce disk load or use faster disks.
to: sysadmin
alarm: 10min_dbengine_global_flushing_errors
@@ -46,5 +47,6 @@ lookup: sum -10m unaligned of flushing_pressure_deletions
every: 10s
crit: $this != 0
delay: down 1h multiplier 1.5 max 3h
- info: number of pages deleted due to failure to flush data to disk in the last 10 minutes, metric data were lost to unblock data collection, please reduce disk load or use faster disks
+ info: number of pages deleted due to failure to flush data to disk in the last 10 minutes. \
+ Metric data were lost to unblock data collection. To fix, reduce disk load or use faster disks.
to: sysadmin