summaryrefslogtreecommitdiffstats
path: root/health/health.d/vernemq.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/vernemq.conf')
-rw-r--r--health/health.d/vernemq.conf319
1 files changed, 110 insertions, 209 deletions
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf
index 36bbaf82..9598dd39 100644
--- a/health/health.d/vernemq.conf
+++ b/health/health.d/vernemq.conf
@@ -18,10 +18,10 @@ template: vernemq_socket_errors
on: vernemq.socket_errors
lookup: sum -1m unaligned absolute of socket_error
units: errors
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: socket errors in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ info: number of socket errors in the last minute
to: sysadmin
# Queues dropped/expired/unhandled PUBLISH messages
@@ -30,30 +30,30 @@ template: vernemq_queue_message_drop
on: vernemq.queue_undelivered_messages
lookup: sum -1m unaligned absolute of queue_message_drop
units: dropped messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: dropped messaged due to full queues in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of dropped messaged due to full queues in the last minute
to: sysadmin
template: vernemq_queue_message_expired
on: vernemq.queue_undelivered_messages
lookup: sum -1m unaligned absolute of queue_message_expired
units: expired messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (15))
- delay: down 5m multiplier 1.5 max 2h
- info: messages which expired before delivery in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (15))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of messages which expired before delivery in the last minute
to: sysadmin
template: vernemq_queue_message_unhandled
on: vernemq.queue_undelivered_messages
lookup: sum -1m unaligned absolute of queue_message_unhandled
units: unhandled messages
- every: 10s
- warn: $this > (($status == $WARNING) ? (0) : (5))
- delay: down 5m multiplier 1.5 max 2h
- info: unhandled messages (connections with clean session=true) in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of unhandled messages (connections with clean session=true) in the last minute
to: sysadmin
# Erlang VM
@@ -66,19 +66,19 @@ template: vernemq_average_scheduler_utilization
warn: $this > (($status >= $WARNING) ? (75) : (85))
crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
- info: average scheduler utilization for the last 10 minutes
+ info: average scheduler utilization over the last 10 minutes
to: sysadmin
# Cluster communication and netsplits
template: vernemq_cluster_dropped
on: vernemq.cluster_dropped
- lookup: average -1m unaligned
- units: KiB/s
- every: 10s
+ lookup: sum -1m unaligned
+ units: KiB
+ every: 1m
warn: $this > 0
- delay: down 5m multiplier 1.5 max 1h
- info: the amount of traffic dropped during communication with the cluster nodes in the last minute
+ delay: up 5m down 5m multiplier 1.5 max 1h
+ info: amount of traffic dropped during communication with the cluster nodes in the last minute
to: sysadmin
template: vernemq_netsplits
@@ -88,68 +88,41 @@ template: vernemq_netsplits
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 2h
- info: detected netsplits in the last minute
+ info: number of detected netsplits (split brain situation) in the last minute
to: sysadmin
# Unsuccessful CONNACK
-template: vernemq_mqtt_connack_sent_reason_success
- on: vernemq.mqtt_connack_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v3/v5 CONNACK sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_connack_sent_reason_unsuccessful
on: vernemq.mqtt_connack_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_connack_sent_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v3/v5 CONNACK sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent unsuccessful v3/v5 CONNACK packets in the last minute
to: sysadmin
# Not normal DISCONNECT
-template: vernemq_mqtt_disconnect_received_reason_normal_disconnect
- on: vernemq.mqtt_disconnect_received_reason
- lookup: sum -1m unaligned absolute match-names of normal_disconnect
- units: packets
- every: 10s
- info: normal v5 DISCONNECT received in the last minute
- to: sysadmin
-
-template: vernemq_mqtt_disconnect_sent_reason_normal_disconnect
- on: vernemq.mqtt_disconnect_sent_reason
- lookup: sum -1m unaligned absolute match-names of normal_disconnect
- units: packets
- every: 10s
- info: normal v5 DISCONNECT sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_disconnect_received_reason_not_normal
on: vernemq.mqtt_disconnect_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_disconnect_received_reason_normal_disconnect
+ lookup: sum -1m unaligned absolute match-names of !normal_disconnect,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: not normal v5 DISCONNECT received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received not normal v5 DISCONNECT packets in the last minute
to: sysadmin
template: vernemq_mqtt_disconnect_sent_reason_not_normal
on: vernemq.mqtt_disconnect_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_disconnect_sent_reason_normal_disconnect
+ lookup: sum -1m unaligned absolute match-names of !normal_disconnect,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: not normal v5 DISCONNECT sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent not normal v5 DISCONNECT packets in the last minute
to: sysadmin
# SUBSCRIBE errors and unauthorized attempts
@@ -158,20 +131,20 @@ template: vernemq_mqtt_subscribe_error
on: vernemq.mqtt_subscribe_error
lookup: sum -1m unaligned absolute
units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 SUBSCRIBE operations in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of failed v3/v5 SUBSCRIBE operations in the last minute
to: sysadmin
template: vernemq_mqtt_subscribe_auth_error
on: vernemq.mqtt_subscribe_auth_error
lookup: sum -1m unaligned absolute
units: attempts
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unauthorized v3/v5 SUBSCRIBE attempts in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of unauthorized v3/v5 SUBSCRIBE attempts in the last minute
to: sysadmin
# UNSUBSCRIBE errors
@@ -180,10 +153,10 @@ template: vernemq_mqtt_unsubscribe_error
on: vernemq.mqtt_unsubscribe_error
lookup: sum -1m unaligned absolute
units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 UNSUBSCRIBE operations in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of failed v3/v5 UNSUBSCRIBE operations in the last minute
to: sysadmin
# PUBLISH errors and unauthorized attempts
@@ -192,208 +165,136 @@ template: vernemq_mqtt_publish_errors
on: vernemq.mqtt_publish_errors
lookup: sum -1m unaligned absolute
units: failed ops
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: failed v3/v5 PUBLISH operations in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of failed v3/v5 PUBLISH operations in the last minute
to: sysadmin
template: vernemq_mqtt_publish_auth_errors
on: vernemq.mqtt_publish_auth_errors
lookup: sum -1m unaligned absolute
units: attempts
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unauthorized v3/v5 PUBLISH attempts in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of unauthorized v3/v5 PUBLISH attempts in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBACK
-template: vernemq_mqtt_puback_received_reason_success
- on: vernemq.mqtt_puback_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBACK received in the last minute
- to: sysadmin
-
-template: vernemq_mqtt_puback_sent_reason_success
- on: vernemq.mqtt_puback_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBACK sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_puback_received_reason_unsuccessful
on: vernemq.mqtt_puback_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_puback_received_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBACK received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unsuccessful v5 PUBACK packets in the last minute
to: sysadmin
template: vernemq_mqtt_puback_sent_reason_unsuccessful
on: vernemq.mqtt_puback_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_puback_sent_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBACK sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent unsuccessful v5 PUBACK packets in the last minute
to: sysadmin
template: vernemq_mqtt_puback_unexpected
on: vernemq.mqtt_puback_invalid_error
lookup: sum -1m unaligned absolute
units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3/v5 PUBACK received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unexpected v3/v5 PUBACK packets in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBREC
-template: vernemq_mqtt_pubrec_received_reason_success
- on: vernemq.mqtt_pubrec_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREC received in the last minute
- to: sysadmin
-
-template: vernemq_mqtt_pubrec_sent_reason_success
- on: vernemq.mqtt_pubrec_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREC sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_pubrec_received_reason_unsuccessful
on: vernemq.mqtt_pubrec_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrec_received_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREC received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unsuccessful v5 PUBREC packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
on: vernemq.mqtt_pubrec_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrec_sent_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREC sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent unsuccessful v5 PUBREC packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrec_invalid_error
on: vernemq.mqtt_pubrec_invalid_error
lookup: sum -1m unaligned absolute
units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3 PUBREC received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unexpected v3 PUBREC packets in the last minute
to: sysadmin
# Unsuccessful PUBREL
-template: vernemq_mqtt_pubrel_received_reason_success
- on: vernemq.mqtt_pubrel_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREL received in the last minute
- to: sysadmin
-
-template: vernemq_mqtt_pubrel_sent_reason_success
- on: vernemq.mqtt_pubrel_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBREL sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_pubrel_received_reason_unsuccessful
on: vernemq.mqtt_pubrel_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrel_received_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREL received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unsuccessful v5 PUBREL packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
on: vernemq.mqtt_pubrel_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubrel_sent_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBREL sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent unsuccessful v5 PUBREL packets in the last minute
to: sysadmin
# Unsuccessful and unexpected PUBCOMP
-template: vernemq_mqtt_pubcomp_received_reason_success
- on: vernemq.mqtt_pubcomp_received_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBCOMP received in the last minute
- to: sysadmin
-
-template: vernemq_mqtt_pubcomp_sent_reason_success
- on: vernemq.mqtt_pubcomp_sent_reason
- lookup: sum -1m unaligned absolute match-names of success
- units: packets
- every: 10s
- info: successful v5 PUBCOMP sent in the last minute
- to: sysadmin
-
template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
on: vernemq.mqtt_pubcomp_received_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubcomp_received_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBCOMP received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unsuccessful v5 PUBCOMP packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
on: vernemq.mqtt_pubcomp_sent_reason
- lookup: sum -1m unaligned absolute
- calc: $this - $vernemq_mqtt_pubcomp_sent_reason_success
+ lookup: sum -1m unaligned absolute match-names of !success,*
units: packets
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unsuccessful v5 PUBCOMP sent in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of sent unsuccessful v5 PUBCOMP packets in the last minute
to: sysadmin
template: vernemq_mqtt_pubcomp_unexpected
on: vernemq.mqtt_pubcomp_invalid_error
lookup: sum -1m unaligned absolute
units: messages
- every: 10s
- warn: $this > 0
- delay: down 5m multiplier 1.5 max 2h
- info: unexpected v3/v5 PUBCOMP received in the last minute
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 5m down 5m multiplier 1.5 max 2h
+ info: number of received unexpected v3/v5 PUBCOMP packets in the last minute
to: sysadmin