summaryrefslogtreecommitdiffstats
path: root/health/health.d/cockroachdb.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/cockroachdb.conf')
-rw-r--r--health/health.d/cockroachdb.conf170
1 files changed, 97 insertions, 73 deletions
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
index 47773d04c..dccd2b064 100644
--- a/health/health.d/cockroachdb.conf
+++ b/health/health.d/cockroachdb.conf
@@ -1,91 +1,115 @@
# Availability
-template: cockroachdb_last_collected_secs
- on: cockroachdb.live_nodes
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- delay: down 5m multiplier 1.5 max 1h
- info: number of seconds since the last successful data collection
- to: dba
+ template: cockroachdb_last_collected_secs
+ on: cockroachdb.live_nodes
+ class: Database
+component: CockroachDB
+ type: Latency
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: dba
# Capacity
-template: cockroachdb_used_storage_capacity
- on: cockroachdb.storage_used_capacity_percentage
- calc: $capacity_used_percent
- units: %
- every: 10s
- warn: $this > (($status >= $WARNING) ? (80) : (85))
- crit: $this > (($status == $CRITICAL) ? (85) : (95))
- delay: down 15m multiplier 1.5 max 1h
- info: storage capacity utilization
- to: dba
+ template: cockroachdb_used_storage_capacity
+ on: cockroachdb.storage_used_capacity_percentage
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $capacity_used_percent
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 15m multiplier 1.5 max 1h
+ info: storage capacity utilization
+ to: dba
-template: cockroachdb_used_usable_storage_capacity
- on: cockroachdb.storage_used_capacity_percentage
- calc: $capacity_usable_used_percent
- units: %
- every: 10s
- warn: $this > (($status >= $WARNING) ? (80) : (85))
- crit: $this > (($status == $CRITICAL) ? (85) : (95))
- delay: down 15m multiplier 1.5 max 1h
- info: storage usable space utilization
- to: dba
+ template: cockroachdb_used_usable_storage_capacity
+ on: cockroachdb.storage_used_capacity_percentage
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $capacity_usable_used_percent
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 15m multiplier 1.5 max 1h
+ info: storage usable space utilization
+ to: dba
# Replication
-template: cockroachdb_unavailable_ranges
- on: cockroachdb.ranges_replication_problem
- calc: $ranges_unavailable
- units: num
- every: 10s
- warn: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- info: number of ranges with fewer live replicas than the replication target
- to: dba
+ template: cockroachdb_unavailable_ranges
+ on: cockroachdb.ranges_replication_problem
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $ranges_unavailable
+ units: num
+ every: 10s
+ warn: $this > 0
+ delay: down 15m multiplier 1.5 max 1h
+ info: number of ranges with fewer live replicas than the replication target
+ to: dba
-template: cockroachdb_replicas_leaders_not_leaseholders
- on: cockroachdb.replicas_leaders
- calc: $replicas_leaders_not_leaseholders
- units: num
- every: 10s
- warn: $this > 0
- delay: down 15m multiplier 1.5 max 1h
- info: number of replicas that are Raft leaders whose range lease is held by another store
- to: dba
+ template: cockroachdb_replicas_leaders_not_leaseholders
+ on: cockroachdb.replicas_leaders
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $replicas_leaders_not_leaseholders
+ units: num
+ every: 10s
+ warn: $this > 0
+ delay: down 15m multiplier 1.5 max 1h
+ info: number of replicas that are Raft leaders whose range lease is held by another store
+ to: dba
# FD
-template: cockroachdb_open_file_descriptors_limit
- on: cockroachdb.process_file_descriptors
- calc: $sys_fd_open/$sys_fd_softlimit * 100
- units: %
- every: 10s
- warn: $this > 80
- delay: down 15m multiplier 1.5 max 1h
- info: open file descriptors utilization (against softlimit)
- to: dba
+ template: cockroachdb_open_file_descriptors_limit
+ on: cockroachdb.process_file_descriptors
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $sys_fd_open/$sys_fd_softlimit * 100
+ units: %
+ every: 10s
+ warn: $this > 80
+ delay: down 15m multiplier 1.5 max 1h
+ info: open file descriptors utilization (against softlimit)
+ to: dba
# SQL
-template: cockroachdb_sql_active_connections
- on: cockroachdb.sql_connections
- calc: $sql_conns
- units: active connections
- every: 10s
- info: number of active SQL connections
- to: dba
+ template: cockroachdb_sql_active_connections
+ on: cockroachdb.sql_connections
+ class: Database
+component: CockroachDB
+ type: Utilization
+ calc: $sql_conns
+ units: active connections
+ every: 10s
+ info: number of active SQL connections
+ to: dba
-template: cockroachdb_sql_executed_statements_total_last_5m
- on: cockroachdb.sql_statements_total
- lookup: sum -5m absolute of sql_query_count
- units: statements
- every: 10s
- warn: $this == 0 AND $cockroachdb_sql_active_connections != 0
- delay: down 15m up 30s multiplier 1.5 max 1h
- info: number of executed SQL statements in the last 5 minutes
- to: dba
+ template: cockroachdb_sql_executed_statements_total_last_5m
+ on: cockroachdb.sql_statements_total
+ class: Database
+component: CockroachDB
+ type: Workload
+ lookup: sum -5m absolute of sql_query_count
+ units: statements
+ every: 10s
+ warn: $this == 0 AND $cockroachdb_sql_active_connections != 0
+ delay: down 15m up 30s multiplier 1.5 max 1h
+ info: number of executed SQL statements in the last 5 minutes
+ to: dba