diff options
Diffstat (limited to 'health/health.d/vernemq.conf')
-rw-r--r-- | health/health.d/vernemq.conf | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf new file mode 100644 index 00000000..36bbaf82 --- /dev/null +++ b/health/health.d/vernemq.conf @@ -0,0 +1,399 @@ + +# Availability + +template: vernemq_last_collected_secs + on: vernemq.node_uptime + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +# Socket errors + +template: vernemq_socket_errors + on: vernemq.socket_errors + lookup: sum -1m unaligned absolute of socket_error + units: errors + every: 10s + warn: $this > (($status == $WARNING) ? (0) : (5)) + delay: down 5m multiplier 1.5 max 2h + info: socket errors in the last minute + to: sysadmin + +# Queues dropped/expired/unhandled PUBLISH messages + +template: vernemq_queue_message_drop + on: vernemq.queue_undelivered_messages + lookup: sum -1m unaligned absolute of queue_message_drop + units: dropped messages + every: 10s + warn: $this > (($status == $WARNING) ? (0) : (5)) + delay: down 5m multiplier 1.5 max 2h + info: dropped messaged due to full queues in the last minute + to: sysadmin + +template: vernemq_queue_message_expired + on: vernemq.queue_undelivered_messages + lookup: sum -1m unaligned absolute of queue_message_expired + units: expired messages + every: 10s + warn: $this > (($status == $WARNING) ? (0) : (15)) + delay: down 5m multiplier 1.5 max 2h + info: messages which expired before delivery in the last minute + to: sysadmin + +template: vernemq_queue_message_unhandled + on: vernemq.queue_undelivered_messages + lookup: sum -1m unaligned absolute of queue_message_unhandled + units: unhandled messages + every: 10s + warn: $this > (($status == $WARNING) ? (0) : (5)) + delay: down 5m multiplier 1.5 max 2h + info: unhandled messages (connections with clean session=true) in the last minute + to: sysadmin + +# Erlang VM + +template: vernemq_average_scheduler_utilization + on: vernemq.average_scheduler_utilization + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: average scheduler utilization for the last 10 minutes + to: sysadmin + +# Cluster communication and netsplits + +template: vernemq_cluster_dropped + on: vernemq.cluster_dropped + lookup: average -1m unaligned + units: KiB/s + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 1h + info: the amount of traffic dropped during communication with the cluster nodes in the last minute + to: sysadmin + +template: vernemq_netsplits + on: vernemq.netsplits + lookup: sum -1m unaligned absolute of netsplit_detected + units: netsplits + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: detected netsplits in the last minute + to: sysadmin + +# Unsuccessful CONNACK + +template: vernemq_mqtt_connack_sent_reason_success + on: vernemq.mqtt_connack_sent_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v3/v5 CONNACK sent in the last minute + to: sysadmin + +template: vernemq_mqtt_connack_sent_reason_unsuccessful + on: vernemq.mqtt_connack_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_connack_sent_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v3/v5 CONNACK sent in the last minute + to: sysadmin + +# Not normal DISCONNECT + +template: vernemq_mqtt_disconnect_received_reason_normal_disconnect + on: vernemq.mqtt_disconnect_received_reason + lookup: sum -1m unaligned absolute match-names of normal_disconnect + units: packets + every: 10s + info: normal v5 DISCONNECT received in the last minute + to: sysadmin + +template: vernemq_mqtt_disconnect_sent_reason_normal_disconnect + on: vernemq.mqtt_disconnect_sent_reason + lookup: sum -1m unaligned absolute match-names of normal_disconnect + units: packets + every: 10s + info: normal v5 DISCONNECT sent in the last minute + to: sysadmin + +template: vernemq_mqtt_disconnect_received_reason_not_normal + on: vernemq.mqtt_disconnect_received_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_disconnect_received_reason_normal_disconnect + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: not normal v5 DISCONNECT received in the last minute + to: sysadmin + +template: vernemq_mqtt_disconnect_sent_reason_not_normal + on: vernemq.mqtt_disconnect_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_disconnect_sent_reason_normal_disconnect + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: not normal v5 DISCONNECT sent in the last minute + to: sysadmin + +# SUBSCRIBE errors and unauthorized attempts + +template: vernemq_mqtt_subscribe_error + on: vernemq.mqtt_subscribe_error + lookup: sum -1m unaligned absolute + units: failed ops + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: failed v3/v5 SUBSCRIBE operations in the last minute + to: sysadmin + +template: vernemq_mqtt_subscribe_auth_error + on: vernemq.mqtt_subscribe_auth_error + lookup: sum -1m unaligned absolute + units: attempts + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unauthorized v3/v5 SUBSCRIBE attempts in the last minute + to: sysadmin + +# UNSUBSCRIBE errors + +template: vernemq_mqtt_unsubscribe_error + on: vernemq.mqtt_unsubscribe_error + lookup: sum -1m unaligned absolute + units: failed ops + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: failed v3/v5 UNSUBSCRIBE operations in the last minute + to: sysadmin + +# PUBLISH errors and unauthorized attempts + +template: vernemq_mqtt_publish_errors + on: vernemq.mqtt_publish_errors + lookup: sum -1m unaligned absolute + units: failed ops + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: failed v3/v5 PUBLISH operations in the last minute + to: sysadmin + +template: vernemq_mqtt_publish_auth_errors + on: vernemq.mqtt_publish_auth_errors + lookup: sum -1m unaligned absolute + units: attempts + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unauthorized v3/v5 PUBLISH attempts in the last minute + to: sysadmin + +# Unsuccessful and unexpected PUBACK + +template: vernemq_mqtt_puback_received_reason_success + on: vernemq.mqtt_puback_received_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBACK received in the last minute + to: sysadmin + +template: vernemq_mqtt_puback_sent_reason_success + on: vernemq.mqtt_puback_sent_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBACK sent in the last minute + to: sysadmin + +template: vernemq_mqtt_puback_received_reason_unsuccessful + on: vernemq.mqtt_puback_received_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_puback_received_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBACK received in the last minute + to: sysadmin + +template: vernemq_mqtt_puback_sent_reason_unsuccessful + on: vernemq.mqtt_puback_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_puback_sent_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBACK sent in the last minute + to: sysadmin + +template: vernemq_mqtt_puback_unexpected + on: vernemq.mqtt_puback_invalid_error + lookup: sum -1m unaligned absolute + units: messages + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unexpected v3/v5 PUBACK received in the last minute + to: sysadmin + +# Unsuccessful and unexpected PUBREC + +template: vernemq_mqtt_pubrec_received_reason_success + on: vernemq.mqtt_pubrec_received_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBREC received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrec_sent_reason_success + on: vernemq.mqtt_pubrec_sent_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBREC sent in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrec_received_reason_unsuccessful + on: vernemq.mqtt_pubrec_received_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubrec_received_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBREC received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrec_sent_reason_unsuccessful + on: vernemq.mqtt_pubrec_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubrec_sent_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBREC sent in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrec_invalid_error + on: vernemq.mqtt_pubrec_invalid_error + lookup: sum -1m unaligned absolute + units: messages + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unexpected v3 PUBREC received in the last minute + to: sysadmin + +# Unsuccessful PUBREL + +template: vernemq_mqtt_pubrel_received_reason_success + on: vernemq.mqtt_pubrel_received_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBREL received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrel_sent_reason_success + on: vernemq.mqtt_pubrel_sent_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBREL sent in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrel_received_reason_unsuccessful + on: vernemq.mqtt_pubrel_received_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubrel_received_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBREL received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubrel_sent_reason_unsuccessful + on: vernemq.mqtt_pubrel_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubrel_sent_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBREL sent in the last minute + to: sysadmin + +# Unsuccessful and unexpected PUBCOMP + +template: vernemq_mqtt_pubcomp_received_reason_success + on: vernemq.mqtt_pubcomp_received_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBCOMP received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubcomp_sent_reason_success + on: vernemq.mqtt_pubcomp_sent_reason + lookup: sum -1m unaligned absolute match-names of success + units: packets + every: 10s + info: successful v5 PUBCOMP sent in the last minute + to: sysadmin + +template: vernemq_mqtt_pubcomp_received_reason_unsuccessful + on: vernemq.mqtt_pubcomp_received_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubcomp_received_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBCOMP received in the last minute + to: sysadmin + +template: vernemq_mqtt_pubcomp_sent_reason_unsuccessful + on: vernemq.mqtt_pubcomp_sent_reason + lookup: sum -1m unaligned absolute + calc: $this - $vernemq_mqtt_pubcomp_sent_reason_success + units: packets + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unsuccessful v5 PUBCOMP sent in the last minute + to: sysadmin + +template: vernemq_mqtt_pubcomp_unexpected + on: vernemq.mqtt_pubcomp_invalid_error + lookup: sum -1m unaligned absolute + units: messages + every: 10s + warn: $this > 0 + delay: down 5m multiplier 1.5 max 2h + info: unexpected v3/v5 PUBCOMP received in the last minute + to: sysadmin |