summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/zfs.conf
blob: 5c8065aa31eb2b634bbb48d3f1d4b83b8f6c1346 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    alarm: zfs_memory_throttle
       on: zfs.memory_ops
    class: Utilization
     type: System
component: File system
   lookup: sum -10m unaligned absolute of throttled
    units: events
    every: 1m
     warn: $this > 0
    delay: down 1h multiplier 1.5 max 2h
  summary: ZFS ARC growth throttling
     info: number of times ZFS had to limit the ARC growth in the last 10 minutes
       to: silent

# ZFS pool state

 template: zfs_pool_state_warn
       on: zfspool.state
    class: Errors
     type: System
component: File system
     calc: $degraded
    units: boolean
    every: 10s
     warn: $this > 0
    delay: down 1m multiplier 1.5 max 1h
  summary: ZFS pool ${label:pool} state
     info: ZFS pool ${label:pool} state is degraded
       to: sysadmin

 template: zfs_pool_state_crit
       on: zfspool.state
    class: Errors
     type: System
component: File system
     calc: $faulted + $unavail
    units: boolean
    every: 10s
     crit: $this > 0
    delay: down 1m multiplier 1.5 max 1h
  summary: Critical ZFS pool ${label:pool} state
     info: ZFS pool ${label:pool} state is faulted or unavail
       to: sysadmin


## go.d/zfspool

 template: zfs_pool_space_utilization
       on: zfspool.pool_space_utilization
    class: Utilization
     type: System
component: File system
     calc: $utilization
    units: %
    every: 1m
     warn: $this > (($status >= $WARNING ) ? (85) : (90))
     crit: $this > (($status >= $WARNING ) ? (90) : (98))
    delay: down 1m multiplier 1.5 max 1h
  summary: ZFS pool ${label:pool} space utilization
     info: ZFS pool ${label:pool} is nearing capacity. Current space usage is above the threshold.
       to: sysadmin

 template: zfs_pool_health_state_warn
       on: zfspool.pool_health_state
    class: Errors
     type: System
component: File system
     calc: $degraded
    units: status
    every: 10s
     warn: $this > 0
    delay: down 1m multiplier 1.5 max 1h
  summary: ZFS pool ${label:pool} state
     info: ZFS pool ${label:pool} state is degraded
       to: sysadmin

 template: zfs_pool_health_state_crit
       on: zfspool.pool_health_state
    class: Errors
     type: System
component: File system
     calc: $faulted + $unavail
    units: status
    every: 10s
     crit: $this > 0
    delay: down 1m multiplier 1.5 max 1h
  summary: Critical ZFS pool ${label:pool} state
     info: ZFS pool ${label:pool} state is faulted or unavail
       to: sysadmin


 template: zfs_vdev_health_state
       on: zfspool.vdev_health_state
    class: Errors
     type: System
component: File system
     calc: $degraded + $faulted
    units: status
    every: 10s
     warn: $this > 0
    delay: down 1m multiplier 1.5 max 1h
  summary: ZFS vdev ${label:vdev} pool ${label:pool} state
     info: ZFS vdev ${label:vdev} state is faulted or degraded
       to: sysadmin