From 1ee0c09c5742557e037df5421ca62abddb90ae22 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 May 2021 14:33:38 +0200 Subject: Merging upstream version 1.31.0. Signed-off-by: Daniel Baumann --- health/health.d/cockroachdb.conf | 170 ++++++++++++++++++++++----------------- 1 file changed, 97 insertions(+), 73 deletions(-) (limited to 'health/health.d/cockroachdb.conf') diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf index 47773d04c..dccd2b064 100644 --- a/health/health.d/cockroachdb.conf +++ b/health/health.d/cockroachdb.conf @@ -1,91 +1,115 @@ # Availability -template: cockroachdb_last_collected_secs - on: cockroachdb.live_nodes - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: dba + template: cockroachdb_last_collected_secs + on: cockroachdb.live_nodes + class: Database +component: CockroachDB + type: Latency + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: dba # Capacity -template: cockroachdb_used_storage_capacity - on: cockroachdb.storage_used_capacity_percentage - calc: $capacity_used_percent - units: % - every: 10s - warn: $this > (($status >= $WARNING) ? (80) : (85)) - crit: $this > (($status == $CRITICAL) ? (85) : (95)) - delay: down 15m multiplier 1.5 max 1h - info: storage capacity utilization - to: dba + template: cockroachdb_used_storage_capacity + on: cockroachdb.storage_used_capacity_percentage + class: Database +component: CockroachDB + type: Utilization + calc: $capacity_used_percent + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: storage capacity utilization + to: dba -template: cockroachdb_used_usable_storage_capacity - on: cockroachdb.storage_used_capacity_percentage - calc: $capacity_usable_used_percent - units: % - every: 10s - warn: $this > (($status >= $WARNING) ? (80) : (85)) - crit: $this > (($status == $CRITICAL) ? (85) : (95)) - delay: down 15m multiplier 1.5 max 1h - info: storage usable space utilization - to: dba + template: cockroachdb_used_usable_storage_capacity + on: cockroachdb.storage_used_capacity_percentage + class: Database +component: CockroachDB + type: Utilization + calc: $capacity_usable_used_percent + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: storage usable space utilization + to: dba # Replication -template: cockroachdb_unavailable_ranges - on: cockroachdb.ranges_replication_problem - calc: $ranges_unavailable - units: num - every: 10s - warn: $this > 0 - delay: down 15m multiplier 1.5 max 1h - info: number of ranges with fewer live replicas than the replication target - to: dba + template: cockroachdb_unavailable_ranges + on: cockroachdb.ranges_replication_problem + class: Database +component: CockroachDB + type: Utilization + calc: $ranges_unavailable + units: num + every: 10s + warn: $this > 0 + delay: down 15m multiplier 1.5 max 1h + info: number of ranges with fewer live replicas than the replication target + to: dba -template: cockroachdb_replicas_leaders_not_leaseholders - on: cockroachdb.replicas_leaders - calc: $replicas_leaders_not_leaseholders - units: num - every: 10s - warn: $this > 0 - delay: down 15m multiplier 1.5 max 1h - info: number of replicas that are Raft leaders whose range lease is held by another store - to: dba + template: cockroachdb_replicas_leaders_not_leaseholders + on: cockroachdb.replicas_leaders + class: Database +component: CockroachDB + type: Utilization + calc: $replicas_leaders_not_leaseholders + units: num + every: 10s + warn: $this > 0 + delay: down 15m multiplier 1.5 max 1h + info: number of replicas that are Raft leaders whose range lease is held by another store + to: dba # FD -template: cockroachdb_open_file_descriptors_limit - on: cockroachdb.process_file_descriptors - calc: $sys_fd_open/$sys_fd_softlimit * 100 - units: % - every: 10s - warn: $this > 80 - delay: down 15m multiplier 1.5 max 1h - info: open file descriptors utilization (against softlimit) - to: dba + template: cockroachdb_open_file_descriptors_limit + on: cockroachdb.process_file_descriptors + class: Database +component: CockroachDB + type: Utilization + calc: $sys_fd_open/$sys_fd_softlimit * 100 + units: % + every: 10s + warn: $this > 80 + delay: down 15m multiplier 1.5 max 1h + info: open file descriptors utilization (against softlimit) + to: dba # SQL -template: cockroachdb_sql_active_connections - on: cockroachdb.sql_connections - calc: $sql_conns - units: active connections - every: 10s - info: number of active SQL connections - to: dba + template: cockroachdb_sql_active_connections + on: cockroachdb.sql_connections + class: Database +component: CockroachDB + type: Utilization + calc: $sql_conns + units: active connections + every: 10s + info: number of active SQL connections + to: dba -template: cockroachdb_sql_executed_statements_total_last_5m - on: cockroachdb.sql_statements_total - lookup: sum -5m absolute of sql_query_count - units: statements - every: 10s - warn: $this == 0 AND $cockroachdb_sql_active_connections != 0 - delay: down 15m up 30s multiplier 1.5 max 1h - info: number of executed SQL statements in the last 5 minutes - to: dba + template: cockroachdb_sql_executed_statements_total_last_5m + on: cockroachdb.sql_statements_total + class: Database +component: CockroachDB + type: Workload + lookup: sum -5m absolute of sql_query_count + units: statements + every: 10s + warn: $this == 0 AND $cockroachdb_sql_active_connections != 0 + delay: down 15m up 30s multiplier 1.5 max 1h + info: number of executed SQL statements in the last 5 minutes + to: dba -- cgit v1.2.3