summaryrefslogtreecommitdiffstats
path: root/health/health.d/mysql.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/mysql.conf')
-rw-r--r--health/health.d/mysql.conf266
1 files changed, 151 insertions, 115 deletions
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf
index 7451b3f4..91860c4a 100644
--- a/health/health.d/mysql.conf
+++ b/health/health.d/mysql.conf
@@ -1,150 +1,186 @@
# make sure mysql is running
-template: mysql_last_collected_secs
- on: mysql.queries
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: dba
+ template: mysql_last_collected_secs
+ on: mysql.queries
+ class: Database
+component: MySQL
+ type: Latency
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: dba
# -----------------------------------------------------------------------------
# slow queries
-template: mysql_10s_slow_queries
- on: mysql.queries
- lookup: sum -10s of slow_queries
- units: slow queries
- every: 10s
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (10) : (20))
- delay: down 5m multiplier 1.5 max 1h
- info: number of slow queries in the last 10 seconds
- to: dba
+ template: mysql_10s_slow_queries
+ on: mysql.queries
+ class: Database
+component: MySQL
+ type: Latency
+ lookup: sum -10s of slow_queries
+ units: slow queries
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (5) : (10))
+ crit: $this > (($status == $CRITICAL) ? (10) : (20))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of slow queries in the last 10 seconds
+ to: dba
# -----------------------------------------------------------------------------
# lock waits
-template: mysql_10s_table_locks_immediate
- on: mysql.table_locks
- lookup: sum -10s absolute of immediate
- units: immediate locks
- every: 10s
- info: number of table immediate locks in the last 10 seconds
- to: dba
-
-template: mysql_10s_table_locks_waited
- on: mysql.table_locks
- lookup: sum -10s absolute of waited
- units: waited locks
- every: 10s
- info: number of table waited locks in the last 10 seconds
- to: dba
-
-template: mysql_10s_waited_locks_ratio
- on: mysql.table_locks
- calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
- units: %
- every: 10s
- warn: $this > (($status >= $WARNING) ? (10) : (25))
- crit: $this > (($status == $CRITICAL) ? (25) : (50))
- delay: down 30m multiplier 1.5 max 1h
- info: ratio of waited table locks over the last 10 seconds
- to: dba
+ template: mysql_10s_table_locks_immediate
+ on: mysql.table_locks
+ class: Database
+component: MySQL
+ type: Utilization
+ lookup: sum -10s absolute of immediate
+ units: immediate locks
+ every: 10s
+ info: number of table immediate locks in the last 10 seconds
+ to: dba
+
+ template: mysql_10s_table_locks_waited
+ on: mysql.table_locks
+ class: Database
+component: MySQL
+ type: Latency
+ lookup: sum -10s absolute of waited
+ units: waited locks
+ every: 10s
+ info: number of table waited locks in the last 10 seconds
+ to: dba
+
+ template: mysql_10s_waited_locks_ratio
+ on: mysql.table_locks
+ class: Database
+component: MySQL
+ type: Latency
+ calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (10) : (25))
+ crit: $this > (($status == $CRITICAL) ? (25) : (50))
+ delay: down 30m multiplier 1.5 max 1h
+ info: ratio of waited table locks over the last 10 seconds
+ to: dba
# -----------------------------------------------------------------------------
# connections
-template: mysql_connections
- on: mysql.connections_active
- calc: $active * 100 / $limit
- units: %
- every: 10s
- warn: $this > (($status >= $WARNING) ? (60) : (70))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
- delay: down 15m multiplier 1.5 max 1h
- info: client connections utilization
- to: dba
+ template: mysql_connections
+ on: mysql.connections_active
+ class: Database
+component: MySQL
+ type: Utilization
+ calc: $active * 100 / $limit
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (60) : (70))
+ crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ delay: down 15m multiplier 1.5 max 1h
+ info: client connections utilization
+ to: dba
# -----------------------------------------------------------------------------
# replication
-template: mysql_replication
- on: mysql.slave_status
- calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
- units: ok/failed
- every: 10s
- crit: $this == 0
- delay: down 5m multiplier 1.5 max 1h
- info: replication status (0: stopped, 1: working)
- to: dba
-
-template: mysql_replication_lag
- on: mysql.slave_behind
- calc: $seconds
- units: seconds
- every: 10s
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (10) : (30))
- delay: down 15m multiplier 1.5 max 1h
- info: difference between the timestamp of the latest transaction processed by the SQL thread and \
- the timestamp of the same transaction when it was processed on the master
- to: dba
+ template: mysql_replication
+ on: mysql.slave_status
+ class: Database
+component: MySQL
+ type: Errors
+ calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
+ units: ok/failed
+ every: 10s
+ crit: $this == 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: replication status (0: stopped, 1: working)
+ to: dba
+
+ template: mysql_replication_lag
+ on: mysql.slave_behind
+ class: Database
+component: MySQL
+ type: Errors
+ calc: $seconds
+ units: seconds
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (5) : (10))
+ crit: $this > (($status == $CRITICAL) ? (10) : (30))
+ delay: down 15m multiplier 1.5 max 1h
+ info: difference between the timestamp of the latest transaction processed by the SQL thread and \
+ the timestamp of the same transaction when it was processed on the master
+ to: dba
# -----------------------------------------------------------------------------
# galera cluster size
-template: mysql_galera_cluster_size_max_2m
- on: mysql.galera_cluster_size
- lookup: max -2m absolute
- units: nodes
- every: 10s
- info: maximum galera cluster size in the last 2 minutes
- to: dba
-
-template: mysql_galera_cluster_size
- on: mysql.galera_cluster_size
- calc: $nodes
- units: nodes
- every: 10s
- warn: $this > $mysql_galera_cluster_size_max_2m
- crit: $this < $mysql_galera_cluster_size_max_2m
- delay: up 20s down 5m multiplier 1.5 max 1h
- info: current galera cluster size, compared to the maximum size in the last 2 minutes
- to: dba
+ template: mysql_galera_cluster_size_max_2m
+ on: mysql.galera_cluster_size
+ class: Database
+component: MySQL
+ type: Utilization
+ lookup: max -2m absolute
+ units: nodes
+ every: 10s
+ info: maximum galera cluster size in the last 2 minutes
+ to: dba
+
+ template: mysql_galera_cluster_size
+ on: mysql.galera_cluster_size
+ class: Database
+component: MySQL
+ type: Utilization
+ calc: $nodes
+ units: nodes
+ every: 10s
+ warn: $this > $mysql_galera_cluster_size_max_2m
+ crit: $this < $mysql_galera_cluster_size_max_2m
+ delay: up 20s down 5m multiplier 1.5 max 1h
+ info: current galera cluster size, compared to the maximum size in the last 2 minutes
+ to: dba
# galera node state
-template: mysql_galera_cluster_state
- on: mysql.galera_cluster_state
- calc: $state
- every: 10s
- warn: $this < 4
- crit: $this < 2
- delay: up 30s down 5m multiplier 1.5 max 1h
- info: galera node state \
- (0: undefined, 1: joining, 2: donor/desynced, 3: joined, 4: synced)
- to: dba
+ template: mysql_galera_cluster_state
+ on: mysql.galera_cluster_state
+ class: Database
+component: MySQL
+ type: Errors
+ calc: $state
+ every: 10s
+ warn: $this == 2 OR $this == 3
+ crit: $this == 0 OR $this == 1 OR $this >= 5
+ delay: up 30s down 5m multiplier 1.5 max 1h
+ info: galera node state \
+ (0: Undefined, 1: Joining, 2: Donor/Desynced, 3: Joined, 4: Synced, 5: Inconsistent)
+ to: dba
# galera node status
-template: mysql_galera_cluster_status
- on: mysql.galera_cluster_status
- calc: $wsrep_cluster_status
- every: 10s
- crit: $mysql_galera_cluster_state != nan AND $this != 0
- delay: up 30s down 5m multiplier 1.5 max 1h
- info: galera node cluster component status \
- (-1: unknown, 0: primary/quorum present, 1: non-primary/quorum lost, 2: disconnected). \
- Any other value than primary indicates that the node is part of a nonoperational component.
- to: dba
+ template: mysql_galera_cluster_status
+ on: mysql.galera_cluster_status
+ class: Database
+component: MySQL
+ type: Errors
+ calc: $wsrep_cluster_status
+ every: 10s
+ crit: $mysql_galera_cluster_state != nan AND $this != 0
+ delay: up 30s down 5m multiplier 1.5 max 1h
+ info: galera node cluster component status \
+ (-1: unknown, 0: primary/quorum present, 1: non-primary/quorum lost, 2: disconnected). \
+ Any other value than primary indicates that the node is part of a nonoperational component.
+ to: dba