summaryrefslogtreecommitdiffstats
path: root/health/health.d/vcsa.conf
blob: 8538e488cb90ca0503906425add8ccbfe1159582 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# make sure vcsa is running and responding

 template: vcsa_last_collected_secs
       on: vcsa.system_health
    class: Virtual Machine
component: VMware vCenter
     type: Latency
     calc: $now - $last_collected_t
    units: seconds ago
    every: 10s
     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
    delay: down 5m multiplier 1.5 max 1h
     info: number of seconds since the last successful data collection
       to: sysadmin

# Overall system health:
#  - 0: all components are healthy.
#  - 1: one or more components might become overloaded soon.
#  - 2: one or more components in the appliance might be degraded.
#  - 3: one or more components might be in an unusable status and the appliance might become unresponsive soon.
#  - 4: no health data is available.

 template: vcsa_system_health
       on: vcsa.system_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of system
    units: status
    every: 10s
     warn: ($this == 1) || ($this == 2)
     crit: $this == 3
    delay: down 1m multiplier 1.5 max 1h
     info: overall system health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

# Components health:
#  - 0: healthy.
#  - 1: healthy, but may have some problems.
#  - 2: degraded, and may have serious problems.
#  - 3: unavailable, or will stop functioning soon.
#  - 4: no health data is available.

 template: vcsa_swap_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of swap
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: swap health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

 template: vcsa_storage_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of storage
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: storage health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

 template: vcsa_mem_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of mem
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: memory health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

 template: vcsa_load_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Utilization
   lookup: max -10s unaligned of load
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: load health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

 template: vcsa_database_storage_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of database_storage
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: database storage health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin

 template: vcsa_applmgmt_health
       on: vcsa.components_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of applmgmt
    units: status
    every: 10s
     warn: $this == 1
     crit: ($this == 2) || ($this == 3)
    delay: down 1m multiplier 1.5 max 1h
     info: applmgmt health status \
           (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
       to: sysadmin


# Software updates health:
#  - 0: no updates available.
#  - 2: non-security updates are available.
#  - 3: security updates are available.
#  - 4: an error retrieving information on software updates.

 template: vcsa_software_updates_health
       on: vcsa.software_updates_health
    class: Virtual Machine
component: VMware vCenter
     type: Errors
   lookup: max -10s unaligned of software_packages
    units: status
    every: 10s
     warn: $this == 4
     crit: $this == 3
    delay: down 1m multiplier 1.5 max 1h
     info: software updates availability status \
           (-1: unknown, 0: green, 2: orange, 3: red, 4: grey)
       to: sysadmin