diff options
Diffstat (limited to 'health/health.d/postgres.conf')
-rw-r--r-- | health/health.d/postgres.conf | 228 |
1 files changed, 0 insertions, 228 deletions
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf deleted file mode 100644 index de4c0078e..000000000 --- a/health/health.d/postgres.conf +++ /dev/null @@ -1,228 +0,0 @@ -# you can disable an alarm notification by setting the 'to' line to: silent - - template: postgres_total_connection_utilization - on: postgres.connections_utilization - class: Utilization - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of used - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (70) : (80)) - crit: $this > (($status == $CRITICAL) ? (80) : (90)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL connection utilization - info: Average total connection utilization over the last minute - to: dba - - template: postgres_acquired_locks_utilization - on: postgres.locks_utilization - class: Utilization - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of used - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (15) : (20)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL acquired locks utilization - info: Average acquired locks utilization over the last minute - to: dba - - template: postgres_txid_exhaustion_perc - on: postgres.txid_exhaustion_perc - class: Utilization - type: Database -component: PostgreSQL - hosts: * - calc: $txid_exhaustion - units: % - every: 1m - warn: $this > 90 - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL TXID exhaustion - info: Percent towards TXID wraparound - to: dba - -# Database alarms - - template: postgres_db_cache_io_ratio - on: postgres.db_cache_io_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of miss - calc: 100 - $this - units: % - every: 1m - warn: $this < (($status >= $WARNING) ? (70) : (60)) - crit: $this < (($status == $CRITICAL) ? (60) : (50)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL DB ${label:database} cache hit ratio - info: Average cache hit ratio in db ${label:database} over the last minute - to: dba - - template: postgres_db_transactions_rollback_ratio - on: postgres.db_transactions_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -5m unaligned of rollback - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (0) : (2)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL DB ${label:database} aborted transactions - info: Average aborted transactions percentage in db ${label:database} over the last five minutes - to: dba - - template: postgres_db_deadlocks_rate - on: postgres.db_deadlocks_rate - class: Errors - type: Database -component: PostgreSQL - hosts: * - lookup: sum -1m unaligned of deadlocks - units: deadlocks - every: 1m - warn: $this > (($status >= $WARNING) ? (0) : (10)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL DB ${label:database} deadlocks rate - info: Number of deadlocks detected in db ${label:database} in the last minute - to: dba - -# Table alarms - - template: postgres_table_cache_io_ratio - on: postgres.table_cache_io_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of miss - calc: 100 - $this - units: % - every: 1m - warn: $this < (($status >= $WARNING) ? (70) : (60)) - crit: $this < (($status == $CRITICAL) ? (60) : (50)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} cache hit ratio - info: Average cache hit ratio in db ${label:database} table ${label:table} over the last minute - to: dba - - template: postgres_table_index_cache_io_ratio - on: postgres.table_index_cache_io_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of miss - calc: 100 - $this - units: % - every: 1m - warn: $this < (($status >= $WARNING) ? (70) : (60)) - crit: $this < (($status == $CRITICAL) ? (60) : (50)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} index cache hit ratio - info: Average index cache hit ratio in db ${label:database} table ${label:table} over the last minute - to: dba - - template: postgres_table_toast_cache_io_ratio - on: postgres.table_toast_cache_io_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of miss - calc: 100 - $this - units: % - every: 1m - warn: $this < (($status >= $WARNING) ? (70) : (60)) - crit: $this < (($status == $CRITICAL) ? (60) : (50)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} toast cache hit ratio - info: Average TOAST hit ratio in db ${label:database} table ${label:table} over the last minute - to: dba - - template: postgres_table_toast_index_cache_io_ratio - on: postgres.table_toast_index_cache_io_ratio - class: Workload - type: Database -component: PostgreSQL - hosts: * - lookup: average -1m unaligned of miss - calc: 100 - $this - units: % - every: 1m - warn: $this < (($status >= $WARNING) ? (70) : (60)) - crit: $this < (($status == $CRITICAL) ? (60) : (50)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} index toast hit ratio - info: average index TOAST hit ratio in db ${label:database} table ${label:table} over the last minute - to: dba - - template: postgres_table_bloat_size_perc - on: postgres.table_bloat_size_perc - class: Errors - type: Database -component: PostgreSQL - hosts: * - calc: ($table_size > (1024 * 1024 * 100)) ? ($bloat) : (0) - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (60) : (70)) - crit: $this > (($status == $CRITICAL) ? (70) : (80)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} bloat size - info: Bloat size percentage in db ${label:database} table ${label:table} - to: dba - - template: postgres_table_last_autovacuum_time - on: postgres.table_autovacuum_since_time - class: Errors - type: Database -component: PostgreSQL - hosts: !* - calc: $time - units: seconds - every: 1m - warn: $this != nan AND $this > (60 * 60 * 24 * 7) - summary: PostgreSQL table ${label:table} db ${label:database} last autovacuum - info: Time elapsed since db ${label:database} table ${label:table} was vacuumed by the autovacuum daemon - to: dba - - template: postgres_table_last_autoanalyze_time - on: postgres.table_autoanalyze_since_time - class: Errors - type: Database -component: PostgreSQL - hosts: !* - calc: $time - units: seconds - every: 1m - warn: $this != nan AND $this > (60 * 60 * 24 * 7) - summary: PostgreSQL table ${label:table} db ${label:database} last autoanalyze - info: Time elapsed since db ${label:database} table ${label:table} was analyzed by the autovacuum daemon - to: dba - -# Index alarms - - template: postgres_index_bloat_size_perc - on: postgres.index_bloat_size_perc - class: Errors - type: Database -component: PostgreSQL - hosts: * - calc: ($index_size > (1024 * 1024 * 10)) ? ($bloat) : (0) - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (60) : (70)) - crit: $this > (($status == $CRITICAL) ? (70) : (80)) - delay: down 15m multiplier 1.5 max 1h - summary: PostgreSQL table ${label:table} db ${label:database} index bloat size - info: Bloat size percentage in db ${label:database} table ${label:table} index ${label:index} - to: dba |