summaryrefslogtreecommitdiffstats
path: root/health/health.d/vcsa.conf
blob: f4b03d4cfe4cf8ee8151142a288683795e0c3e6f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# make sure vcsa is running and responding

template: vcsa_last_collected_secs
      on: vcsa.system_health
    calc: $now - $last_collected_t
   units: seconds ago
   every: 10s
    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
   delay: down 5m multiplier 1.5 max 1h
    info: number of seconds since the last successful data collection
      to: sysadmin

# Overall system health:
#  - 0: all components are healthy.
#  - 1: one or more components might become overloaded soon.
#  - 2: one or more components in the appliance might be degraded.
#  - 3: one or more components might be in an unusable status and the appliance might become unresponsive soon.
#  - 4: no health data is available.

template: vcsa_system_health
      on: vcsa.system_health
  lookup: max -10s unaligned of system
   units: status
   every: 10s
    warn: ($this == 1) || ($this == 2)
    crit: $this == 3
   delay: down 1m multiplier 1.5 max 1h
    info: overall system health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

# Components health:
#  - 0: healthy.
#  - 1: healthy, but may have some problems.
#  - 2: degraded, and may have serious problems.
#  - 3: unavailable, or will stop functioning soon.
#  - 4: no health data is available.

template: vcsa_swap_health
      on: vcsa.components_health
  lookup: max -10s unaligned of swap
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: swap health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

template: vcsa_storage_health
      on: vcsa.components_health
  lookup: max -10s unaligned of storage
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: storage health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

template: vcsa_mem_health
      on: vcsa.components_health
  lookup: max -10s unaligned of mem
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: memory health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

template: vcsa_load_health
      on: vcsa.components_health
  lookup: max -10s unaligned of load
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: load health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

template: vcsa_database_storage_health
      on: vcsa.components_health
  lookup: max -10s unaligned of database_storage
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: database storage health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin

template: vcsa_applmgmt_health
      on: vcsa.components_health
  lookup: max -10s unaligned of applmgmt
   units: status
   every: 10s
    warn: $this == 1
    crit: ($this == 2) || ($this == 3)
   delay: down 1m multiplier 1.5 max 1h
    info: applmgmt health status \
          (-1: unknown, 0: green, 1: yellow, 2: orange, 3: red, 4: grey)
      to: sysadmin


# Software updates health:
#  - 0: no updates available.
#  - 2: non-security updates are available.
#  - 3: security updates are available.
#  - 4: an error retrieving information on software updates.

template: vcsa_software_updates_health
      on: vcsa.software_updates_health
  lookup: max -10s unaligned of software_packages
   units: status
   every: 10s
    warn: $this == 4
    crit: $this == 3
   delay: down 1m multiplier 1.5 max 1h
    info: software updates availability status \
          (-1: unknown, 0: green, 2: orange, 3: red, 4: grey)
      to: sysadmin