summaryrefslogtreecommitdiffstats
path: root/health/health.d/postgres.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/postgres.conf')
-rw-r--r--health/health.d/postgres.conf228
1 files changed, 0 insertions, 228 deletions
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf
deleted file mode 100644
index de4c0078e..000000000
--- a/health/health.d/postgres.conf
+++ /dev/null
@@ -1,228 +0,0 @@
-# you can disable an alarm notification by setting the 'to' line to: silent
-
- template: postgres_total_connection_utilization
- on: postgres.connections_utilization
- class: Utilization
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of used
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL connection utilization
- info: Average total connection utilization over the last minute
- to: dba
-
- template: postgres_acquired_locks_utilization
- on: postgres.locks_utilization
- class: Utilization
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of used
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (15) : (20))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL acquired locks utilization
- info: Average acquired locks utilization over the last minute
- to: dba
-
- template: postgres_txid_exhaustion_perc
- on: postgres.txid_exhaustion_perc
- class: Utilization
- type: Database
-component: PostgreSQL
- hosts: *
- calc: $txid_exhaustion
- units: %
- every: 1m
- warn: $this > 90
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL TXID exhaustion
- info: Percent towards TXID wraparound
- to: dba
-
-# Database alarms
-
- template: postgres_db_cache_io_ratio
- on: postgres.db_cache_io_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of miss
- calc: 100 - $this
- units: %
- every: 1m
- warn: $this < (($status >= $WARNING) ? (70) : (60))
- crit: $this < (($status == $CRITICAL) ? (60) : (50))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL DB ${label:database} cache hit ratio
- info: Average cache hit ratio in db ${label:database} over the last minute
- to: dba
-
- template: postgres_db_transactions_rollback_ratio
- on: postgres.db_transactions_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -5m unaligned of rollback
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (0) : (2))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL DB ${label:database} aborted transactions
- info: Average aborted transactions percentage in db ${label:database} over the last five minutes
- to: dba
-
- template: postgres_db_deadlocks_rate
- on: postgres.db_deadlocks_rate
- class: Errors
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: sum -1m unaligned of deadlocks
- units: deadlocks
- every: 1m
- warn: $this > (($status >= $WARNING) ? (0) : (10))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL DB ${label:database} deadlocks rate
- info: Number of deadlocks detected in db ${label:database} in the last minute
- to: dba
-
-# Table alarms
-
- template: postgres_table_cache_io_ratio
- on: postgres.table_cache_io_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of miss
- calc: 100 - $this
- units: %
- every: 1m
- warn: $this < (($status >= $WARNING) ? (70) : (60))
- crit: $this < (($status == $CRITICAL) ? (60) : (50))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} cache hit ratio
- info: Average cache hit ratio in db ${label:database} table ${label:table} over the last minute
- to: dba
-
- template: postgres_table_index_cache_io_ratio
- on: postgres.table_index_cache_io_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of miss
- calc: 100 - $this
- units: %
- every: 1m
- warn: $this < (($status >= $WARNING) ? (70) : (60))
- crit: $this < (($status == $CRITICAL) ? (60) : (50))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} index cache hit ratio
- info: Average index cache hit ratio in db ${label:database} table ${label:table} over the last minute
- to: dba
-
- template: postgres_table_toast_cache_io_ratio
- on: postgres.table_toast_cache_io_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of miss
- calc: 100 - $this
- units: %
- every: 1m
- warn: $this < (($status >= $WARNING) ? (70) : (60))
- crit: $this < (($status == $CRITICAL) ? (60) : (50))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} toast cache hit ratio
- info: Average TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
- to: dba
-
- template: postgres_table_toast_index_cache_io_ratio
- on: postgres.table_toast_index_cache_io_ratio
- class: Workload
- type: Database
-component: PostgreSQL
- hosts: *
- lookup: average -1m unaligned of miss
- calc: 100 - $this
- units: %
- every: 1m
- warn: $this < (($status >= $WARNING) ? (70) : (60))
- crit: $this < (($status == $CRITICAL) ? (60) : (50))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} index toast hit ratio
- info: average index TOAST hit ratio in db ${label:database} table ${label:table} over the last minute
- to: dba
-
- template: postgres_table_bloat_size_perc
- on: postgres.table_bloat_size_perc
- class: Errors
- type: Database
-component: PostgreSQL
- hosts: *
- calc: ($table_size > (1024 * 1024 * 100)) ? ($bloat) : (0)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (60) : (70))
- crit: $this > (($status == $CRITICAL) ? (70) : (80))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} bloat size
- info: Bloat size percentage in db ${label:database} table ${label:table}
- to: dba
-
- template: postgres_table_last_autovacuum_time
- on: postgres.table_autovacuum_since_time
- class: Errors
- type: Database
-component: PostgreSQL
- hosts: !*
- calc: $time
- units: seconds
- every: 1m
- warn: $this != nan AND $this > (60 * 60 * 24 * 7)
- summary: PostgreSQL table ${label:table} db ${label:database} last autovacuum
- info: Time elapsed since db ${label:database} table ${label:table} was vacuumed by the autovacuum daemon
- to: dba
-
- template: postgres_table_last_autoanalyze_time
- on: postgres.table_autoanalyze_since_time
- class: Errors
- type: Database
-component: PostgreSQL
- hosts: !*
- calc: $time
- units: seconds
- every: 1m
- warn: $this != nan AND $this > (60 * 60 * 24 * 7)
- summary: PostgreSQL table ${label:table} db ${label:database} last autoanalyze
- info: Time elapsed since db ${label:database} table ${label:table} was analyzed by the autovacuum daemon
- to: dba
-
-# Index alarms
-
- template: postgres_index_bloat_size_perc
- on: postgres.index_bloat_size_perc
- class: Errors
- type: Database
-component: PostgreSQL
- hosts: *
- calc: ($index_size > (1024 * 1024 * 10)) ? ($bloat) : (0)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (60) : (70))
- crit: $this > (($status == $CRITICAL) ? (70) : (80))
- delay: down 15m multiplier 1.5 max 1h
- summary: PostgreSQL table ${label:table} db ${label:database} index bloat size
- info: Bloat size percentage in db ${label:database} table ${label:table} index ${label:index}
- to: dba