blob: e24f71830908dd12cb9bddb4bd04de75c4992d1e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# you can disable an alarm notification by setting the 'to' line to: silent
template: clickhouse_restarted
on: clickhouse.uptime
class: Error
type: Database
component: ClickHouse
calc: $uptime
units: seconds
every: 10s
warn: $this > 1 AND $this < 180
summary: ClickHouse restart detected
info: ClickHouse has recently been restarted
to: silent
template: clickhouse_queries_preempted
on: clickhouse.queries_preempted
class: Workload
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: preempted_queries
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse preempted queries detected
info: ClickHouse has queries that are stopped and waiting due to priority setting
to: dba
template: clickhouse_long_running_query
on: clickhouse.longest_running_query_time
class: Latency
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: seconds
every: 10s
warn: $this > (($status >= $WARNING) ? (300) : (600))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse long-running query detected
info: ClickHouse has a long-running query exceeding the threshold
to: dba
template: clickhouse_rejected_inserts
on: clickhouse.rejected_inserts
class: Workload
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: rejected_inserts
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse rejected INSERT queries detected
info: ClickHouse has INSERT queries that are rejected due to high number of active data parts for partition in a MergeTree
to: dba
template: clickhouse_delayed_inserts
on: clickhouse.delayed_inserts
class: Workload
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: delayed_inserts
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse delayed INSERT queries detected
info: ClickHouse has INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree
to: silent
template: clickhouse_replication_lag
on: clickhouse.replicas_max_absolute_delay
class: Workload
type: Database
component: ClickHouse
lookup: avg -1m unaligned
units: seconds
every: 10s
warn: $this > (($status >= $WARNING) ? (250) : (300))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high replication lag detected
info: ClickHouse is experiencing replication lag greater than 5 minutes
to: dba
template: clickhouse_replicated_readonly_tables
on: clickhouse.replicated_readonly_tables
class: Error
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: readonly_tables
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse replicated tables in readonly state detected
info: ClickHouse has replicated tables in readonly state due to ZooKeeper session loss/startup without ZooKeeper configured
to: dba
template: clickhouse_max_part_count_for_partition
on: clickhouse.max_part_count_for_partition
class: Workload
type: Database
component: ClickHouse
lookup: avg -1m unaligned
units: parts
every: 10s
warn: $this > (($status >= $WARNING) ? (200) : (300))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high parts/partition detected
info: ClickHouse high number of parts per partition
to: dba
template: clickhouse_distributed_connections_failures
on: clickhouse.distributed_connections_fail_exhausted_retries
class: Error
type: Database
component: ClickHouse
lookup: sum -1m unaligned
units: failures
every: 10s
warn: $this > 0
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse distributed connections failures detected
info: ClickHouse has failed distributed connections after exhausting all retry attempts
to: dba
template: clickhouse_distributed_files_to_insert
on: clickhouse.distributed_files_to_insert
class: Workload
type: Database
component: ClickHouse
lookup: max -1m unaligned
units: files
every: 10s
warn: $this > (($status >= $WARNING) ? (40) : (80))
delay: down 5m multiplier 1.5 max 1h
summary: ClickHouse high files to insert detected
info: ClickHouse high number of pending files to process for asynchronous insertion into Distributed tables
to: silent
|