summaryrefslogtreecommitdiffstats
path: root/health/health.d/disks.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/disks.conf')
-rw-r--r--health/health.d/disks.conf126
1 files changed, 69 insertions, 57 deletions
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index d0cd60cfc..60f8faed9 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -9,33 +9,39 @@
# raise an alarm if the disk is low on
# available disk space
-template: disk_space_usage
- on: disk.space
- os: linux freebsd
- hosts: *
-families: !/dev !/dev/* !/run !/run/* *
- calc: $used * 100 / ($avail + $used)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING ) ? (80) : (90))
- crit: $this > (($status == $CRITICAL) ? (90) : (98))
- delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk space utilization
- to: sysadmin
-
-template: disk_inode_usage
- on: disk.inodes
- os: linux freebsd
- hosts: *
-families: !/dev !/dev/* !/run !/run/* *
- calc: $used * 100 / ($avail + $used)
- units: %
- every: 1m
- warn: $this > (($status >= $WARNING) ? (80) : (90))
- crit: $this > (($status == $CRITICAL) ? (90) : (98))
- delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk inode utilization
- to: sysadmin
+ template: disk_space_usage
+ on: disk.space
+ class: System
+component: Disk
+ type: Utilization
+ os: linux freebsd
+ hosts: *
+ families: !/dev !/dev/* !/run !/run/* *
+ calc: $used * 100 / ($avail + $used)
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING ) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: disk $family space utilization
+ to: sysadmin
+
+ template: disk_inode_usage
+ on: disk.inodes
+ class: System
+component: Disk
+ type: Utilization
+ os: linux freebsd
+ hosts: *
+ families: !/dev !/dev/* !/run !/run/* *
+ calc: $used * 100 / ($avail + $used)
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: disk $family inode utilization
+ to: sysadmin
# -----------------------------------------------------------------------------
@@ -128,21 +134,24 @@ families: !/dev !/dev/* !/run !/run/* *
# by calculating the average disk utilization
# for the last 10 minutes
-template: 10min_disk_utilization
- on: disk.util
- os: linux freebsd
- hosts: *
-families: *
- lookup: average -10m unaligned
- units: %
- every: 1m
- green: 90
- red: 98
- warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
- crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
- delay: down 15m multiplier 1.2 max 1h
- info: average percentage of time the disk was busy over the last 10 minutes
- to: silent
+ template: 10min_disk_utilization
+ on: disk.util
+ class: System
+component: Disk
+ type: Utilization
+ os: linux freebsd
+ hosts: *
+ families: *
+ lookup: average -10m unaligned
+ units: %
+ every: 1m
+ green: 90
+ red: 98
+ warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
+ crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
+ delay: down 15m multiplier 1.2 max 1h
+ info: average percentage of time $family disk was busy over the last 10 minutes
+ to: silent
# raise an alarm if the disk backlog
@@ -150,18 +159,21 @@ families: *
# for 10 minutes
# (i.e. the disk cannot catch up)
-template: 10min_disk_backlog
- on: disk.backlog
- os: linux
- hosts: *
-families: *
- lookup: average -10m unaligned
- units: ms
- every: 1m
- green: 2000
- red: 5000
- warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
- crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
- delay: down 15m multiplier 1.2 max 1h
- info: average disk backlog size over the last 10 minutes
- to: silent
+ template: 10min_disk_backlog
+ on: disk.backlog
+ class: System
+component: Disk
+ type: Latency
+ os: linux
+ hosts: *
+ families: *
+ lookup: average -10m unaligned
+ units: ms
+ every: 1m
+ green: 2000
+ red: 5000
+ warn: $this > $green * (($status >= $WARNING) ? (0.7) : (1))
+ crit: $this > $red * (($status == $CRITICAL) ? (0.7) : (1))
+ delay: down 15m multiplier 1.2 max 1h
+ info: average backlog size of the $family disk over the last 10 minutes
+ to: silent