summaryrefslogtreecommitdiffstats
path: root/health/health.d/vsphere.conf
blob: 3e1414c16b18964873a6da66d18a16b52d52563e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# you can disable an alarm notification by setting the 'to' line to: silent

# -----------------------------------------------VM Specific------------------------------------------------------------
# Memory

template: vsphere_vm_mem_usage
      on: vsphere.vm_mem_usage_percentage
   hosts: *
    calc: $used
   units: %
   every: 20s
    warn: $this > (($status >= $WARNING)  ? (80) : (90))
    crit: $this > (($status == $CRITICAL) ? (90) : (98))
   delay: down 15m multiplier 1.5 max 1h
    info: virtual machine memory utilization

# -----------------------------------------------HOST Specific----------------------------------------------------------
# Memory

template: vsphere_host_mem_usage
      on: vsphere.host_mem_usage_percentage
   hosts: *
    calc: $used
   units: %
   every: 20s
    warn: $this > (($status >= $WARNING)  ? (80) : (90))
    crit: $this > (($status == $CRITICAL) ? (90) : (98))
   delay: down 15m multiplier 1.5 max 1h
    info: host memory utilization

# Network errors

template: vsphere_inbound_packets_errors
      on: vsphere.net_errors_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of rx
   units: packets
   every: 1m
    info: number of inbound errors for the network interface in the last 10 minutes

template: vsphere_outbound_packets_errors
      on: vsphere.net_errors_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of tx
   units: packets
   every: 1m
    info: number of outbound errors for the network interface in the last 10 minutes

# Network errors ratio

template: vsphere_inbound_packets_errors_ratio
      on: vsphere.net_packets_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of rx
    calc: (($vsphere_inbound_packets_errors != nan AND $this > 1000) ? ($vsphere_inbound_packets_errors * 100 / $this) : (0))
   units: %
   every: 1m
    warn: $this >= 2
   delay: up 1m down 1h multiplier 1.5 max 2h
    info: ratio of inbound errors for the network interface over the last 10 minutes
      to: sysadmin

template: vsphere_outbound_packets_errors_ratio
      on: vsphere.net_packets_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of tx
    calc: (($vsphere_outbound_packets_errors != nan AND $this > 1000) ? ($vsphere_outbound_packets_errors * 100 / $this) : (0))
   units: %
   every: 1m
    warn: $this >= 2
   delay: up 1m down 1h multiplier 1.5 max 2h
    info: ratio of outbound errors for the network interface over the last 10 minutes
      to: sysadmin

# -----------------------------------------------Common-------------------------------------------------------------------
# CPU

template: vsphere_cpu_usage
      on: vsphere.cpu_usage_total
   hosts: *
  lookup: average -10m unaligned match-names of used
   units: %
   every: 20s
    warn: $this > (($status >= $WARNING)  ? (75) : (85))
    crit: $this > (($status == $CRITICAL) ? (85) : (95))
   delay: down 15m multiplier 1.5 max 1h
    info: average CPU utilization
      to: sysadmin

# Network drops

template: vsphere_inbound_packets_dropped
      on: vsphere.net_drops_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of rx
   units: packets
   every: 1m
    info: number of inbound dropped packets for the network interface in the last 10 minutes

template: vsphere_outbound_packets_dropped
      on: vsphere.net_drops_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of tx
   units: packets
   every: 1m
    info: number of outbound dropped packets for the network interface in the last 10 minutes

# Network drops ratio

template: vsphere_inbound_packets_dropped_ratio
      on: vsphere.net_packets_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of rx
    calc: (($vsphere_inbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_inbound_packets_dropped * 100 / $this) : (0))
   units: %
   every: 1m
    warn: $this >= 2
   delay: up 1m down 1h multiplier 1.5 max 2h
    info: ratio of inbound dropped packets for the network interface over the last 10 minutes
      to: sysadmin

template: vsphere_outbound_packets_dropped_ratio
      on: vsphere.net_packets_total
   hosts: *
families: *
  lookup: sum -10m unaligned absolute match-names of tx
    calc: (($vsphere_outbound_packets_dropped != nan AND $this > 1000) ? ($vsphere_outbound_packets_dropped * 100 / $this) : (0))
   units: %
   every: 1m
    warn: $this >= 2
   delay: up 1m down 1h multiplier 1.5 max 2h
    info: ratio of outbound dropped packets for the network interface over the last 10 minutes
      to: sysadmin