summaryrefslogtreecommitdiffstats
path: root/health/health.d/wmi.conf
blob: 6bd4e077f79ac1f74a47fb09c88b0dada0afbe70 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# you can disable an alarm notification by setting the 'to' line to: silent

## Availability

 template: wmi_last_collected_secs
       on: cpu.collector_duration
    class: Windows
component: Availability
     type: Latency
     calc: $now - $last_collected_t
    units: seconds ago
    every: 10s
     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
    delay: down 5m multiplier 1.5 max 1h
     info: number of seconds since the last successful data collection
       to: sysadmin

## CPU

 template: wmi_10min_cpu_usage
       on: wmi.cpu_utilization_total
    class: Windows
component: CPU
     type: Utilization
       os: linux
    hosts: *
   lookup: average -10m unaligned match-names of dpc,user,privileged,interrupt
    units: %
    every: 1m
     warn: $this > (($status >= $WARNING)  ? (75) : (85))
     crit: $this > (($status == $CRITICAL) ? (85) : (95))
    delay: down 15m multiplier 1.5 max 1h
     info: average CPU utilization over the last 10 minutes
       to: sysadmin


## Memory

 template: wmi_ram_in_use
       on: wmi.memory_utilization
    class: Windows
component: Memory
     type: Utilization
       os: linux
    hosts: *
     calc: ($used) * 100 / ($used + $available)
    units: %
    every: 10s
     warn: $this > (($status >= $WARNING)  ? (80) : (90))
     crit: $this > (($status == $CRITICAL) ? (90) : (98))
    delay: down 15m multiplier 1.5 max 1h
     info: memory utilization
       to: sysadmin

 template: wmi_swap_in_use
       on: wmi.memory_swap_utilization
    class: Windows
component: Memory
     type: Utilization
       os: linux
    hosts: *
     calc: ($used) * 100 / ($used + $available)
    units: %
    every: 10s
     warn: $this > (($status >= $WARNING)  ? (80) : (90))
     crit: $this > (($status == $CRITICAL) ? (90) : (98))
    delay: down 15m multiplier 1.5 max 1h
     info: swap memory utilization
       to: sysadmin


## Network

 template: wmi_inbound_packets_discarded
       on: wmi.net_discarded
    class: Windows
component: Network
     type: Errors
       os: linux
    hosts: *
 families: *
   lookup: sum -10m unaligned absolute match-names of inbound
    units: packets
    every: 1m
     warn: $this >= 5
    delay: down 1h multiplier 1.5 max 2h
     info: number of inbound discarded packets for the network interface in the last 10 minutes
       to: sysadmin

 template: wmi_outbound_packets_discarded
       on: wmi.net_discarded
    class: Windows
component: Network
     type: Errors
       os: linux
    hosts: *
 families: *
   lookup: sum -10m unaligned absolute match-names of outbound
    units: packets
    every: 1m
     warn: $this >= 5
    delay: down 1h multiplier 1.5 max 2h
     info: number of outbound discarded packets for the network interface in the last 10 minutes
       to: sysadmin

 template: wmi_inbound_packets_errors
       on: wmi.net_errors
    class: Windows
component: Network
     type: Errors
       os: linux
    hosts: *
 families: *
   lookup: sum -10m unaligned absolute match-names of inbound
    units: packets
    every: 1m
     warn: $this >= 5
    delay: down 1h multiplier 1.5 max 2h
     info: number of inbound errors for the network interface in the last 10 minutes
       to: sysadmin

 template: wmi_outbound_packets_errors
       on: wmi.net_errors
    class: Windows
component: Network
     type: Errors
       os: linux
    hosts: *
 families: *
   lookup: sum -10m unaligned absolute match-names of outbound
    units: packets
    every: 1m
     warn: $this >= 5
    delay: down 1h multiplier 1.5 max 2h
     info: number of outbound errors for the network interface in the last 10 minutes
       to: sysadmin


## Disk

 template: wmi_disk_in_use
       on: wmi.logical_disk_utilization
    class: Windows
component: Disk
     type: Utilization
       os: linux
    hosts: *
     calc: ($used) * 100 / ($used + $free)
    units: %
    every: 10s
     warn: $this > (($status >= $WARNING)  ? (80) : (90))
     crit: $this > (($status == $CRITICAL) ? (90) : (98))
    delay: down 15m multiplier 1.5 max 1h
     info: disk space utilization
       to: sysadmin