summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/clickhouse.conf
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/health/health.d/clickhouse.conf140
1 files changed, 140 insertions, 0 deletions
diff --git a/src/health/health.d/clickhouse.conf b/src/health/health.d/clickhouse.conf
new file mode 100644
index 000000000..e24f71830
--- /dev/null
+++ b/src/health/health.d/clickhouse.conf
@@ -0,0 +1,140 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: clickhouse_restarted
+ on: clickhouse.uptime
+ class: Error
+ type: Database
+component: ClickHouse
+ calc: $uptime
+ units: seconds
+ every: 10s
+ warn: $this > 1 AND $this < 180
+ summary: ClickHouse restart detected
+ info: ClickHouse has recently been restarted
+ to: silent
+
+ template: clickhouse_queries_preempted
+ on: clickhouse.queries_preempted
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: max -1m unaligned
+ units: preempted_queries
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse preempted queries detected
+ info: ClickHouse has queries that are stopped and waiting due to priority setting
+ to: dba
+
+ template: clickhouse_long_running_query
+ on: clickhouse.longest_running_query_time
+ class: Latency
+ type: Database
+component: ClickHouse
+ lookup: max -1m unaligned
+ units: seconds
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (300) : (600))
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse long-running query detected
+ info: ClickHouse has a long-running query exceeding the threshold
+ to: dba
+
+ template: clickhouse_rejected_inserts
+ on: clickhouse.rejected_inserts
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: sum -1m unaligned
+ units: rejected_inserts
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse rejected INSERT queries detected
+ info: ClickHouse has INSERT queries that are rejected due to high number of active data parts for partition in a MergeTree
+ to: dba
+
+ template: clickhouse_delayed_inserts
+ on: clickhouse.delayed_inserts
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: sum -1m unaligned
+ units: delayed_inserts
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse delayed INSERT queries detected
+ info: ClickHouse has INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree
+ to: silent
+
+ template: clickhouse_replication_lag
+ on: clickhouse.replicas_max_absolute_delay
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: avg -1m unaligned
+ units: seconds
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (250) : (300))
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse high replication lag detected
+ info: ClickHouse is experiencing replication lag greater than 5 minutes
+ to: dba
+
+ template: clickhouse_replicated_readonly_tables
+ on: clickhouse.replicated_readonly_tables
+ class: Error
+ type: Database
+component: ClickHouse
+ lookup: max -1m unaligned
+ units: readonly_tables
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse replicated tables in readonly state detected
+ info: ClickHouse has replicated tables in readonly state due to ZooKeeper session loss/startup without ZooKeeper configured
+ to: dba
+
+ template: clickhouse_max_part_count_for_partition
+ on: clickhouse.max_part_count_for_partition
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: avg -1m unaligned
+ units: parts
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (200) : (300))
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse high parts/partition detected
+ info: ClickHouse high number of parts per partition
+ to: dba
+
+ template: clickhouse_distributed_connections_failures
+ on: clickhouse.distributed_connections_fail_exhausted_retries
+ class: Error
+ type: Database
+component: ClickHouse
+ lookup: sum -1m unaligned
+ units: failures
+ every: 10s
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse distributed connections failures detected
+ info: ClickHouse has failed distributed connections after exhausting all retry attempts
+ to: dba
+
+ template: clickhouse_distributed_files_to_insert
+ on: clickhouse.distributed_files_to_insert
+ class: Workload
+ type: Database
+component: ClickHouse
+ lookup: max -1m unaligned
+ units: files
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (40) : (80))
+ delay: down 5m multiplier 1.5 max 1h
+ summary: ClickHouse high files to insert detected
+ info: ClickHouse high number of pending files to process for asynchronous insertion into Distributed tables
+ to: silent