diff options
Diffstat (limited to 'health/health.d/mysql.conf')
-rw-r--r-- | health/health.d/mysql.conf | 266 |
1 files changed, 151 insertions, 115 deletions
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf index 7451b3f4d..91860c4a7 100644 --- a/health/health.d/mysql.conf +++ b/health/health.d/mysql.conf @@ -1,150 +1,186 @@ # make sure mysql is running -template: mysql_last_collected_secs - on: mysql.queries - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: dba + template: mysql_last_collected_secs + on: mysql.queries + class: Database +component: MySQL + type: Latency + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: dba # ----------------------------------------------------------------------------- # slow queries -template: mysql_10s_slow_queries - on: mysql.queries - lookup: sum -10s of slow_queries - units: slow queries - every: 10s - warn: $this > (($status >= $WARNING) ? (5) : (10)) - crit: $this > (($status == $CRITICAL) ? (10) : (20)) - delay: down 5m multiplier 1.5 max 1h - info: number of slow queries in the last 10 seconds - to: dba + template: mysql_10s_slow_queries + on: mysql.queries + class: Database +component: MySQL + type: Latency + lookup: sum -10s of slow_queries + units: slow queries + every: 10s + warn: $this > (($status >= $WARNING) ? (5) : (10)) + crit: $this > (($status == $CRITICAL) ? (10) : (20)) + delay: down 5m multiplier 1.5 max 1h + info: number of slow queries in the last 10 seconds + to: dba # ----------------------------------------------------------------------------- # lock waits -template: mysql_10s_table_locks_immediate - on: mysql.table_locks - lookup: sum -10s absolute of immediate - units: immediate locks - every: 10s - info: number of table immediate locks in the last 10 seconds - to: dba - -template: mysql_10s_table_locks_waited - on: mysql.table_locks - lookup: sum -10s absolute of waited - units: waited locks - every: 10s - info: number of table waited locks in the last 10 seconds - to: dba - -template: mysql_10s_waited_locks_ratio - on: mysql.table_locks - calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0 - units: % - every: 10s - warn: $this > (($status >= $WARNING) ? (10) : (25)) - crit: $this > (($status == $CRITICAL) ? (25) : (50)) - delay: down 30m multiplier 1.5 max 1h - info: ratio of waited table locks over the last 10 seconds - to: dba + template: mysql_10s_table_locks_immediate + on: mysql.table_locks + class: Database +component: MySQL + type: Utilization + lookup: sum -10s absolute of immediate + units: immediate locks + every: 10s + info: number of table immediate locks in the last 10 seconds + to: dba + + template: mysql_10s_table_locks_waited + on: mysql.table_locks + class: Database +component: MySQL + type: Latency + lookup: sum -10s absolute of waited + units: waited locks + every: 10s + info: number of table waited locks in the last 10 seconds + to: dba + + template: mysql_10s_waited_locks_ratio + on: mysql.table_locks + class: Database +component: MySQL + type: Latency + calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0 + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (10) : (25)) + crit: $this > (($status == $CRITICAL) ? (25) : (50)) + delay: down 30m multiplier 1.5 max 1h + info: ratio of waited table locks over the last 10 seconds + to: dba # ----------------------------------------------------------------------------- # connections -template: mysql_connections - on: mysql.connections_active - calc: $active * 100 / $limit - units: % - every: 10s - warn: $this > (($status >= $WARNING) ? (60) : (70)) - crit: $this > (($status == $CRITICAL) ? (80) : (90)) - delay: down 15m multiplier 1.5 max 1h - info: client connections utilization - to: dba + template: mysql_connections + on: mysql.connections_active + class: Database +component: MySQL + type: Utilization + calc: $active * 100 / $limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (60) : (70)) + crit: $this > (($status == $CRITICAL) ? (80) : (90)) + delay: down 15m multiplier 1.5 max 1h + info: client connections utilization + to: dba # ----------------------------------------------------------------------------- # replication -template: mysql_replication - on: mysql.slave_status - calc: ($sql_running <= 0 OR $io_running <= 0)?0:1 - units: ok/failed - every: 10s - crit: $this == 0 - delay: down 5m multiplier 1.5 max 1h - info: replication status (0: stopped, 1: working) - to: dba - -template: mysql_replication_lag - on: mysql.slave_behind - calc: $seconds - units: seconds - every: 10s - warn: $this > (($status >= $WARNING) ? (5) : (10)) - crit: $this > (($status == $CRITICAL) ? (10) : (30)) - delay: down 15m multiplier 1.5 max 1h - info: difference between the timestamp of the latest transaction processed by the SQL thread and \ - the timestamp of the same transaction when it was processed on the master - to: dba + template: mysql_replication + on: mysql.slave_status + class: Database +component: MySQL + type: Errors + calc: ($sql_running <= 0 OR $io_running <= 0)?0:1 + units: ok/failed + every: 10s + crit: $this == 0 + delay: down 5m multiplier 1.5 max 1h + info: replication status (0: stopped, 1: working) + to: dba + + template: mysql_replication_lag + on: mysql.slave_behind + class: Database +component: MySQL + type: Errors + calc: $seconds + units: seconds + every: 10s + warn: $this > (($status >= $WARNING) ? (5) : (10)) + crit: $this > (($status == $CRITICAL) ? (10) : (30)) + delay: down 15m multiplier 1.5 max 1h + info: difference between the timestamp of the latest transaction processed by the SQL thread and \ + the timestamp of the same transaction when it was processed on the master + to: dba # ----------------------------------------------------------------------------- # galera cluster size -template: mysql_galera_cluster_size_max_2m - on: mysql.galera_cluster_size - lookup: max -2m absolute - units: nodes - every: 10s - info: maximum galera cluster size in the last 2 minutes - to: dba - -template: mysql_galera_cluster_size - on: mysql.galera_cluster_size - calc: $nodes - units: nodes - every: 10s - warn: $this > $mysql_galera_cluster_size_max_2m - crit: $this < $mysql_galera_cluster_size_max_2m - delay: up 20s down 5m multiplier 1.5 max 1h - info: current galera cluster size, compared to the maximum size in the last 2 minutes - to: dba + template: mysql_galera_cluster_size_max_2m + on: mysql.galera_cluster_size + class: Database +component: MySQL + type: Utilization + lookup: max -2m absolute + units: nodes + every: 10s + info: maximum galera cluster size in the last 2 minutes + to: dba + + template: mysql_galera_cluster_size + on: mysql.galera_cluster_size + class: Database +component: MySQL + type: Utilization + calc: $nodes + units: nodes + every: 10s + warn: $this > $mysql_galera_cluster_size_max_2m + crit: $this < $mysql_galera_cluster_size_max_2m + delay: up 20s down 5m multiplier 1.5 max 1h + info: current galera cluster size, compared to the maximum size in the last 2 minutes + to: dba # galera node state -template: mysql_galera_cluster_state - on: mysql.galera_cluster_state - calc: $state - every: 10s - warn: $this < 4 - crit: $this < 2 - delay: up 30s down 5m multiplier 1.5 max 1h - info: galera node state \ - (0: undefined, 1: joining, 2: donor/desynced, 3: joined, 4: synced) - to: dba + template: mysql_galera_cluster_state + on: mysql.galera_cluster_state + class: Database +component: MySQL + type: Errors + calc: $state + every: 10s + warn: $this == 2 OR $this == 3 + crit: $this == 0 OR $this == 1 OR $this >= 5 + delay: up 30s down 5m multiplier 1.5 max 1h + info: galera node state \ + (0: Undefined, 1: Joining, 2: Donor/Desynced, 3: Joined, 4: Synced, 5: Inconsistent) + to: dba # galera node status -template: mysql_galera_cluster_status - on: mysql.galera_cluster_status - calc: $wsrep_cluster_status - every: 10s - crit: $mysql_galera_cluster_state != nan AND $this != 0 - delay: up 30s down 5m multiplier 1.5 max 1h - info: galera node cluster component status \ - (-1: unknown, 0: primary/quorum present, 1: non-primary/quorum lost, 2: disconnected). \ - Any other value than primary indicates that the node is part of a nonoperational component. - to: dba + template: mysql_galera_cluster_status + on: mysql.galera_cluster_status + class: Database +component: MySQL + type: Errors + calc: $wsrep_cluster_status + every: 10s + crit: $mysql_galera_cluster_state != nan AND $this != 0 + delay: up 30s down 5m multiplier 1.5 max 1h + info: galera node cluster component status \ + (-1: unknown, 0: primary/quorum present, 1: non-primary/quorum lost, 2: disconnected). \ + Any other value than primary indicates that the node is part of a nonoperational component. + to: dba |