summaryrefslogtreecommitdiffstats
path: root/health/health.d/dbengine.conf
blob: 274673e3e3b218cd7aed62af9fd21ab297734572 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# you can disable an alarm notification by setting the 'to' line to: silent

 alarm: 10min_dbengine_global_fs_errors
    on: netdata.dbengine_global_errors
    os: linux freebsd macos
 hosts: *
lookup: sum -10m unaligned of fs_errors
 units: errors
 every: 10s
  crit: $this > 0
 delay: down 15m multiplier 1.5 max 1h
  info: number of File-System errors dbengine came across the last 10 minutes (too many open files, wrong permissions etc)
    to: sysadmin

 alarm: 10min_dbengine_global_io_errors
    on: netdata.dbengine_global_errors
    os: linux freebsd macos
 hosts: *
lookup: sum -10m unaligned of io_errors
 units: errors
 every: 10s
  crit: $this > 0
 delay: down 1h multiplier 1.5 max 3h
  info: number of IO errors dbengine came across the last 10 minutes (CRC errors, out of space, bad disk etc)
    to: sysadmin

 alarm: 10min_dbengine_global_flushing_warnings
    on: netdata.dbengine_global_errors
    os: linux freebsd macos
 hosts: *
lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
 units: errors
 every: 10s
  warn: $this > 0
 delay: down 1h multiplier 1.5 max 3h
  info: number of times in the last 10 minutes that dbengine dirty pages were over 50% of the instance's page cache, metric data at risk of not being stored in the database, please reduce disk load or use faster disks
    to: sysadmin

 alarm: 10min_dbengine_global_flushing_errors
    on: netdata.dbengine_long_term_page_stats
    os: linux freebsd macos
 hosts: *
lookup: sum -10m unaligned of flushing_pressure_deletions
 units: pages
 every: 10s
  crit: $this != 0
 delay: down 1h multiplier 1.5 max 3h
  info: number of pages deleted due to failure to flush data to disk in the last 10 minutes, metric data were lost to unblock data collection, please reduce disk load or use faster disks
    to: sysadmin