summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/vernemq.conf
diff options
context:
space:
mode:
Diffstat (limited to 'src/health/health.d/vernemq.conf')
-rw-r--r--src/health/health.d/vernemq.conf391
1 files changed, 391 insertions, 0 deletions
diff --git a/src/health/health.d/vernemq.conf b/src/health/health.d/vernemq.conf
new file mode 100644
index 000000000..6ea9f99dc
--- /dev/null
+++ b/src/health/health.d/vernemq.conf
@@ -0,0 +1,391 @@
+
+# Socket errors
+
+ template: vernemq_socket_errors
+ on: vernemq.socket_errors
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: sum -1m unaligned absolute of socket_error
+ units: errors
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ socket errors
+ info: Number of socket errors in the last minute
+ to: sysadmin
+
+# Queues dropped/expired/unhandled PUBLISH messages
+
+ template: vernemq_queue_message_drop
+ on: vernemq.queue_undelivered_messages
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute of queue_message_drop
+ units: dropped messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ dropped messages
+ info: Number of dropped messages due to full queues in the last minute
+ to: sysadmin
+
+ template: vernemq_queue_message_expired
+ on: vernemq.queue_undelivered_messages
+ class: Latency
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute of queue_message_expired
+ units: expired messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ expired messages
+ info: number of messages which expired before delivery in the last minute
+ to: sysadmin
+
+ template: vernemq_queue_message_unhandled
+ on: vernemq.queue_undelivered_messages
+ class: Latency
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute of queue_message_unhandled
+ units: unhandled messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unhandled messages
+ info: Number of unhandled messages (connections with clean session=true) in the last minute
+ to: sysadmin
+
+# Erlang VM
+
+ template: vernemq_average_scheduler_utilization
+ on: vernemq.average_scheduler_utilization
+ class: Utilization
+ type: Messaging
+component: VerneMQ
+ lookup: average -10m unaligned
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 15m multiplier 1.5 max 1h
+ summary: VerneMQ scheduler utilization
+ info: Average scheduler utilization over the last 10 minutes
+ to: sysadmin
+
+# Cluster communication and netsplits
+
+ template: vernemq_cluster_dropped
+ on: vernemq.cluster_dropped
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: sum -1m unaligned
+ units: KiB
+ every: 1m
+ warn: $this > 0
+ delay: up 5m down 5m multiplier 1.5 max 1h
+ summary: VerneMQ dropped traffic
+ info: Amount of traffic dropped during communication with the cluster nodes in the last minute
+ to: sysadmin
+
+ template: vernemq_netsplits
+ on: vernemq.netsplits
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: sum -1m unaligned absolute of netsplit_detected
+ units: netsplits
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 2h
+ summary: VerneMQ netsplits
+ info: Number of detected netsplits (split brain situation) in the last minute
+ to: sysadmin
+
+# Unsuccessful CONNACK
+
+ template: vernemq_mqtt_connack_sent_reason_unsuccessful
+ on: vernemq.mqtt_connack_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful CONNACK
+ info: Number of sent unsuccessful v3/v5 CONNACK packets in the last minute
+ to: sysadmin
+
+# Not normal DISCONNECT
+
+ template: vernemq_mqtt_disconnect_received_reason_not_normal
+ on: vernemq.mqtt_disconnect_received_reason
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ received not normal DISCONNECT
+ info: Number of received not normal v5 DISCONNECT packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_disconnect_sent_reason_not_normal
+ on: vernemq.mqtt_disconnect_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !normal_disconnect,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ sent not normal DISCONNECT
+ info: Number of sent not normal v5 DISCONNECT packets in the last minute
+ to: sysadmin
+
+# SUBSCRIBE errors and unauthorized attempts
+
+ template: vernemq_mqtt_subscribe_error
+ on: vernemq.mqtt_subscribe_error
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: failed ops
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ failed SUBSCRIBE
+ info: Number of failed v3/v5 SUBSCRIBE operations in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_subscribe_auth_error
+ on: vernemq.mqtt_subscribe_auth_error
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: attempts
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unauthorized SUBSCRIBE
+ info: number of unauthorized v3/v5 SUBSCRIBE attempts in the last minute
+ to: sysadmin
+
+# UNSUBSCRIBE errors
+
+ template: vernemq_mqtt_unsubscribe_error
+ on: vernemq.mqtt_unsubscribe_error
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: failed ops
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ failed UNSUBSCRIBE
+ info: Number of failed v3/v5 UNSUBSCRIBE operations in the last minute
+ to: sysadmin
+
+# PUBLISH errors and unauthorized attempts
+
+ template: vernemq_mqtt_publish_errors
+ on: vernemq.mqtt_publish_errors
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: failed ops
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ failed PUBLISH
+ info: Number of failed v3/v5 PUBLISH operations in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_publish_auth_errors
+ on: vernemq.mqtt_publish_auth_errors
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: attempts
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unauthorized PUBLISH
+ info: Number of unauthorized v3/v5 PUBLISH attempts in the last minute
+ to: sysadmin
+
+# Unsuccessful and unexpected PUBACK
+
+ template: vernemq_mqtt_puback_received_reason_unsuccessful
+ on: vernemq.mqtt_puback_received_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful received PUBACK
+ info: Number of received unsuccessful v5 PUBACK packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_puback_sent_reason_unsuccessful
+ on: vernemq.mqtt_puback_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful sent PUBACK
+ info: Number of sent unsuccessful v5 PUBACK packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_puback_unexpected
+ on: vernemq.mqtt_puback_invalid_error
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unnexpected recieved PUBACK
+ info: Number of received unexpected v3/v5 PUBACK packets in the last minute
+ to: sysadmin
+
+# Unsuccessful and unexpected PUBREC
+
+ template: vernemq_mqtt_pubrec_received_reason_unsuccessful
+ on: vernemq.mqtt_pubrec_received_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful received PUBREC
+ info: Number of received unsuccessful v5 PUBREC packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_pubrec_sent_reason_unsuccessful
+ on: vernemq.mqtt_pubrec_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful sent PUBREC
+ info: Number of sent unsuccessful v5 PUBREC packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_pubrec_invalid_error
+ on: vernemq.mqtt_pubrec_invalid_error
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ invalid received PUBREC
+ info: Number of received invalid v3 PUBREC packets in the last minute
+ to: sysadmin
+
+# Unsuccessful PUBREL
+
+ template: vernemq_mqtt_pubrel_received_reason_unsuccessful
+ on: vernemq.mqtt_pubrel_received_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful received PUBREL
+ info: Number of received unsuccessful v5 PUBREL packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_pubrel_sent_reason_unsuccessful
+ on: vernemq.mqtt_pubrel_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful sent PUBREL
+ info: number of sent unsuccessful v5 PUBREL packets in the last minute
+ to: sysadmin
+
+# Unsuccessful and unexpected PUBCOMP
+
+ template: vernemq_mqtt_pubcomp_received_reason_unsuccessful
+ on: vernemq.mqtt_pubcomp_received_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful received PUBCOMP
+ info: Number of received unsuccessful v5 PUBCOMP packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful
+ on: vernemq.mqtt_pubcomp_sent_reason
+ class: Errors
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute match-names of !success,*
+ units: packets
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unsuccessful sent PUBCOMP
+ info: number of sent unsuccessful v5 PUBCOMP packets in the last minute
+ to: sysadmin
+
+ template: vernemq_mqtt_pubcomp_unexpected
+ on: vernemq.mqtt_pubcomp_invalid_error
+ class: Workload
+ type: Messaging
+component: VerneMQ
+ lookup: average -1m unaligned absolute
+ units: messages
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (0) : (5))
+ delay: up 2m down 5m multiplier 1.5 max 2h
+ summary: VerneMQ unexpected received PUBCOMP
+ info: number of received unexpected v3/v5 PUBCOMP packets in the last minute
+ to: sysadmin