summaryrefslogtreecommitdiffstats
path: root/health/health.d/mysql.conf
blob: 7451b3f4d60227683d5f161b97fde84d3ae44190 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# make sure mysql is running

template: mysql_last_collected_secs
      on: mysql.queries
    calc: $now - $last_collected_t
   units: seconds ago
   every: 10s
    warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
    crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
   delay: down 5m multiplier 1.5 max 1h
    info: number of seconds since the last successful data collection
      to: dba


# -----------------------------------------------------------------------------
# slow queries

template: mysql_10s_slow_queries
      on: mysql.queries
  lookup: sum -10s of slow_queries
   units: slow queries
   every: 10s
    warn: $this > (($status >= $WARNING)  ? (5)  : (10))
    crit: $this > (($status == $CRITICAL) ? (10) : (20))
   delay: down 5m multiplier 1.5 max 1h
    info: number of slow queries in the last 10 seconds
      to: dba


# -----------------------------------------------------------------------------
# lock waits

template: mysql_10s_table_locks_immediate
      on: mysql.table_locks
  lookup: sum -10s absolute of immediate
   units: immediate locks
   every: 10s
    info: number of table immediate locks in the last 10 seconds
      to: dba

template: mysql_10s_table_locks_waited
      on: mysql.table_locks
  lookup: sum -10s absolute of waited
   units: waited locks
   every: 10s
    info: number of table waited locks in the last 10 seconds
      to: dba

template: mysql_10s_waited_locks_ratio
      on: mysql.table_locks
    calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0
   units: %
   every: 10s
    warn: $this > (($status >= $WARNING)  ? (10) : (25))
    crit: $this > (($status == $CRITICAL) ? (25) : (50))
   delay: down 30m multiplier 1.5 max 1h
    info: ratio of waited table locks over the last 10 seconds
      to: dba


# -----------------------------------------------------------------------------
# connections

template: mysql_connections
      on: mysql.connections_active
    calc: $active * 100 / $limit
   units: %
   every: 10s
    warn: $this > (($status >= $WARNING)  ? (60) : (70))
    crit: $this > (($status == $CRITICAL) ? (80) : (90))
   delay: down 15m multiplier 1.5 max 1h
    info: client connections utilization
      to: dba


# -----------------------------------------------------------------------------
# replication

template: mysql_replication
      on: mysql.slave_status
    calc: ($sql_running <= 0 OR $io_running <= 0)?0:1
   units: ok/failed
   every: 10s
    crit: $this == 0
   delay: down 5m multiplier 1.5 max 1h
    info: replication status (0: stopped, 1: working)
      to: dba

template: mysql_replication_lag
      on: mysql.slave_behind
    calc: $seconds
   units: seconds
   every: 10s
    warn: $this > (($status >= $WARNING)  ? (5)  : (10))
    crit: $this > (($status == $CRITICAL) ? (10) : (30))
   delay: down 15m multiplier 1.5 max 1h
    info: difference between the timestamp of the latest transaction processed by the SQL thread and \
          the timestamp of the same transaction when it was processed on the master
      to: dba


# -----------------------------------------------------------------------------
# galera cluster size

template: mysql_galera_cluster_size_max_2m
      on: mysql.galera_cluster_size
  lookup: max -2m absolute
   units: nodes
   every: 10s
    info: maximum galera cluster size in the last 2 minutes
      to: dba

template: mysql_galera_cluster_size
      on: mysql.galera_cluster_size
    calc: $nodes
   units: nodes
   every: 10s
    warn: $this > $mysql_galera_cluster_size_max_2m
    crit: $this < $mysql_galera_cluster_size_max_2m
   delay: up 20s down 5m multiplier 1.5 max 1h
    info: current galera cluster size, compared to the maximum size in the last 2 minutes
      to: dba

# galera node state

template: mysql_galera_cluster_state
      on: mysql.galera_cluster_state
    calc: $state
   every: 10s
    warn: $this < 4
    crit: $this < 2
   delay: up 30s down 5m multiplier 1.5 max 1h
    info: galera node state \
          (0: undefined, 1: joining, 2: donor/desynced, 3: joined, 4: synced)
      to: dba


# galera node status

template: mysql_galera_cluster_status
      on: mysql.galera_cluster_status
    calc: $wsrep_cluster_status
   every: 10s
    crit: $mysql_galera_cluster_state != nan AND $this != 0
   delay: up 30s down 5m multiplier 1.5 max 1h
    info: galera node cluster component status \
          (-1: unknown, 0: primary/quorum present, 1: non-primary/quorum lost, 2: disconnected). \
          Any other value than primary indicates that the node is part of a nonoperational component.
      to: dba